Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 29%
105 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 10:08 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 10:08 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("QueryBackend",)
25from abc import abstractmethod
26from collections.abc import Iterable, Mapping, Sequence, Set
27from typing import TYPE_CHECKING, Any, Generic, TypeVar
29from lsst.daf.relation import (
30 BinaryOperationRelation,
31 ColumnTag,
32 LeafRelation,
33 MarkerRelation,
34 Relation,
35 UnaryOperationRelation,
36)
38from ...core import (
39 DataCoordinate,
40 DatasetColumnTag,
41 DatasetType,
42 DimensionGraph,
43 DimensionKeyColumnTag,
44 DimensionRecord,
45 DimensionUniverse,
46)
47from .._collectionType import CollectionType
48from .._exceptions import DatasetTypeError, MissingDatasetTypeError
49from ..wildcards import CollectionWildcard
50from ._query_context import QueryContext
51from .find_first_dataset import FindFirstDataset
53if TYPE_CHECKING: 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true
54 from ..interfaces import CollectionRecord
57_C = TypeVar("_C", bound=QueryContext)
60class QueryBackend(Generic[_C]):
61 """An interface for constructing and evaluating the
62 `~lsst.daf.relation.Relation` objects that comprise registry queries.
64 This ABC is expected to have a concrete subclass for each concrete registry
65 type, and most subclasses will be paired with a `QueryContext` subclass.
66 See `QueryContext` for the division of responsibilities between these two
67 interfaces.
68 """
70 @property
71 @abstractmethod
72 def universe(self) -> DimensionUniverse:
73 """Definition of all dimensions and dimension elements for this
74 registry (`DimensionUniverse`).
75 """
76 raise NotImplementedError()
78 def context(self) -> _C:
79 """Return a context manager that can be used to execute queries with
80 this backend.
82 Returns
83 -------
84 context : `QueryContext`
85 Context manager that manages state and connections needed to
86 execute queries.
87 """
88 raise NotImplementedError()
90 @abstractmethod
91 def get_collection_name(self, key: Any) -> str:
92 """Return the collection name associated with a collection primary key
93 value.
95 Parameters
96 ----------
97 key
98 Collection primary key value.
100 Returns
101 -------
102 name : `str`
103 Collection name.
104 """
105 raise NotImplementedError()
107 @abstractmethod
108 def resolve_collection_wildcard(
109 self,
110 expression: Any,
111 *,
112 collection_types: Set[CollectionType] = CollectionType.all(),
113 done: set[str] | None = None,
114 flatten_chains: bool = True,
115 include_chains: bool | None = None,
116 ) -> list[CollectionRecord]:
117 """Return the collection records that match a wildcard expression.
119 Parameters
120 ----------
121 expression
122 Names and/or patterns for collections; will be passed to
123 `CollectionWildcard.from_expression`.
124 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
125 If provided, only yield collections of these types.
126 done : `set` [ `str` ], optional
127 A set of collection names that should be skipped, updated to
128 include all processed collection names on return.
129 flatten_chains : `bool`, optional
130 If `True` (default) recursively yield the child collections of
131 `~CollectionType.CHAINED` collections.
132 include_chains : `bool`, optional
133 If `False`, return records for `~CollectionType.CHAINED`
134 collections themselves. The default is the opposite of
135 ``flattenChains``: either return records for CHAINED collections or
136 their children, but not both.
138 Returns
139 -------
140 records : `list` [ `CollectionRecord` ]
141 Matching collection records.
142 """
143 raise NotImplementedError()
145 @abstractmethod
146 def resolve_dataset_type_wildcard(
147 self,
148 expression: Any,
149 components: bool | None = None,
150 missing: list[str] | None = None,
151 explicit_only: bool = False,
152 components_deprecated: bool = True,
153 ) -> dict[DatasetType, list[str | None]]:
154 """Return the dataset types that match a wildcard expression.
156 Parameters
157 ----------
158 expression
159 Names and/or patterns for dataset types; will be passed to
160 `DatasetTypeWildcard.from_expression`.
161 components : `bool`, optional
162 If `True`, apply all expression patterns to component dataset type
163 names as well. If `False`, never apply patterns to components. If
164 `None` (default), apply patterns to components only if their parent
165 datasets were not matched by the expression. Fully-specified
166 component datasets (`str` or `DatasetType` instances) are always
167 included.
168 missing : `list` of `str`, optional
169 String dataset type names that were explicitly given (i.e. not
170 regular expression patterns) but not found will be appended to this
171 list, if it is provided.
172 explicit_only : `bool`, optional
173 If `True`, require explicit `DatasetType` instances or `str` names,
174 with `re.Pattern` instances deprecated and ``...`` prohibited.
175 components_deprecated : `bool`, optional
176 If `True`, this is a context in which component dataset support is
177 deprecated. This will result in a deprecation warning when
178 ``components=True`` or ``components=None`` and a component dataset
179 is matched. In the future this will become an error.
181 Returns
182 -------
183 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ]
184 A mapping with resolved dataset types as keys and lists of
185 matched component names as values, where `None` indicates the
186 parent composite dataset type was matched.
187 """
188 raise NotImplementedError()
190 def resolve_single_dataset_type_wildcard(
191 self,
192 expression: Any,
193 components: bool | None = None,
194 explicit_only: bool = False,
195 components_deprecated: bool = True,
196 ) -> tuple[DatasetType, list[str | None]]:
197 """Return a single dataset type that matches a wildcard expression.
199 Parameters
200 ----------
201 expression
202 Names and/or patterns for the dataset type; will be passed to
203 `DatasetTypeWildcard.from_expression`.
204 components : `bool`, optional
205 If `True`, apply all expression patterns to component dataset type
206 names as well. If `False`, never apply patterns to components. If
207 `None` (default), apply patterns to components only if their parent
208 datasets were not matched by the expression. Fully-specified
209 component datasets (`str` or `DatasetType` instances) are always
210 included.
211 explicit_only : `bool`, optional
212 If `True`, require explicit `DatasetType` instances or `str` names,
213 with `re.Pattern` instances deprecated and ``...`` prohibited.
214 components_deprecated : `bool`, optional
215 If `True`, this is a context in which component dataset support is
216 deprecated. This will result in a deprecation warning when
217 ``components=True`` or ``components=None`` and a component dataset
218 is matched. In the future this will become an error.
220 Returns
221 -------
222 single_parent : `DatasetType`
223 The matched parent dataset type.
224 single_components : `list` [ `str` | `None` ]
225 The matched components that correspond to this parent, or `None` if
226 the parent dataset type itself was matched.
228 Notes
229 -----
230 This method really finds a single parent dataset type and any number of
231 components, because it's only the parent dataset type that's known to
232 registry at all; many callers are expected to discard the
233 ``single_components`` return value.
234 """
235 missing: list[str] = []
236 matching = self.resolve_dataset_type_wildcard(
237 expression,
238 components=components,
239 missing=missing,
240 explicit_only=explicit_only,
241 components_deprecated=components_deprecated,
242 )
243 if not matching:
244 if missing:
245 raise MissingDatasetTypeError(
246 "\n".join(
247 f"Dataset type {t!r} is not registered, so no instances of it can exist."
248 for t in missing
249 )
250 )
251 else:
252 raise MissingDatasetTypeError(
253 f"No registered dataset types matched expression {expression!r}, "
254 "so no datasets will be found."
255 )
256 if len(matching) > 1:
257 raise DatasetTypeError(
258 f"Expression {expression!r} matched multiple parent dataset types: "
259 f"{[t.name for t in matching]}, but only one is allowed."
260 )
261 ((single_parent, single_components),) = matching.items()
262 if missing:
263 raise DatasetTypeError(
264 f"Expression {expression!r} appears to involve multiple dataset types, even though only "
265 f"one ({single_parent.name}) is registered, and only one is allowed here."
266 )
267 return single_parent, single_components
269 @abstractmethod
270 def filter_dataset_collections(
271 self,
272 dataset_types: Iterable[DatasetType],
273 collections: Sequence[CollectionRecord],
274 *,
275 governor_constraints: Mapping[str, Set[str]],
276 rejections: list[str] | None = None,
277 ) -> dict[DatasetType, list[CollectionRecord]]:
278 """Filter a sequence of collections to those for which a dataset query
279 might succeed.
281 Parameters
282 ----------
283 dataset_types : `Iterable` [ `DatasetType` ]
284 Dataset types that are being queried. Must include only parent
285 or standalone dataset types, not components.
286 collections : `Sequence` [ `CollectionRecord` ]
287 Sequence of collections that will be searched.
288 governor_constraints : `Mapping` [ `str`, `~collections.abc.Set` ], \
289 optional
290 Constraints imposed by other aspects of the query on governor
291 dimensions; collections inconsistent with these constraints will be
292 skipped.
293 rejections : `list` [ `str` ], optional
294 If not `None`, a `list` that diagnostic messages will be appended
295 to, for any collection that matches ``collections`` that is not
296 returned. At least one message is guaranteed whenever the result
297 is empty.
299 Returns
300 -------
301 dataset_collections : `dict` [ `DatasetType`, \
302 `list` [ `CollectionRecord` ] ]
303 The collections to search for each dataset. The dictionary's keys
304 are always exactly ``dataset_types`` (in the same order), and each
305 nested `list` of collections is ordered consistently with the
306 given ``collections``.
308 Notes
309 -----
310 This method accepts multiple dataset types and multiple collections at
311 once to enable implementations to batch up the fetching of summary
312 information needed to relate them.
313 """
314 raise NotImplementedError()
316 def resolve_dataset_collections(
317 self,
318 dataset_type: DatasetType,
319 collections: CollectionWildcard,
320 *,
321 governor_constraints: Mapping[str, Set[str]],
322 rejections: list[str] | None = None,
323 collection_types: Set[CollectionType] = CollectionType.all(),
324 allow_calibration_collections: bool = False,
325 ) -> list[CollectionRecord]:
326 """Resolve the sequence of collections to query for a dataset type.
328 Parameters
329 ----------
330 dataset_type : `DatasetType`
331 Dataset type to be queried in the returned collections.
332 collections : `CollectionWildcard`
333 Expression for the collections to be queried.
334 governor_constraints : `Mapping` [ `str`, `~collections.abc.Set` ], \
335 optional
336 Constraints imposed by other aspects of the query on governor
337 dimensions; collections inconsistent with these constraints will be
338 skipped.
339 rejections : `list` [ `str` ], optional
340 If not `None`, a `list` that diagnostic messages will be appended
341 to, for any collection that matches ``collections`` that is not
342 returned. At least one message is guaranteed whenever the result
343 is empty.
344 collection_types : `~collections.abc.Set` [ `CollectionType` ], \
345 optional
346 Collection types to consider when resolving the collection
347 expression.
348 allow_calibration_collections : `bool`, optional
349 If `False`, skip (with a ``rejections`` message) any calibration
350 collections that match ``collections`` are not given explicitly by
351 name, and raise `NotImplementedError` for any calibration
352 collection that is given explicitly. This is a temporary option
353 that will be removed when the query system can handle temporal
354 joins involving calibration collections.
356 Returns
357 -------
358 records : `list` [ `CollectionRecord` ]
359 A new list of `CollectionRecord` instances, for collections that
360 both match ``collections`` and may have datasets of the given type.
362 Notes
363 -----
364 This is a higher-level driver for `resolve_collection_wildcard` and
365 `filter_dataset_collections` that is mostly concerned with handling
366 queries against `~Collection.Type.CALIBRATION` collections that aren't
367 fully supported yet. Once that support improves, this method may be
368 removed.
369 """
370 if collections == CollectionWildcard() and collection_types == CollectionType.all():
371 collection_types = {CollectionType.RUN}
372 explicit_collections = frozenset(collections.strings)
373 matching_collection_records = self.resolve_collection_wildcard(
374 collections, collection_types=collection_types
375 )
376 ((_, filtered_collection_records),) = self.filter_dataset_collections(
377 [dataset_type],
378 matching_collection_records,
379 governor_constraints=governor_constraints,
380 rejections=rejections,
381 ).items()
382 if not allow_calibration_collections:
383 supported_collection_records: list[CollectionRecord] = []
384 for record in filtered_collection_records:
385 if record.type is CollectionType.CALIBRATION:
386 # If collection name was provided explicitly then raise,
387 # since this is a kind of query we don't support yet;
388 # otherwise collection is a part of a chained one or regex
389 # match, and we skip it to not break queries of other
390 # included collections.
391 if record.name in explicit_collections:
392 raise NotImplementedError(
393 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type "
394 f"collection {record.name!r} is not yet supported."
395 )
396 else:
397 if rejections is not None:
398 rejections.append(
399 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION "
400 f"collection {record.name!r} because calibration queries aren't fully "
401 "implemented; this is not an error only because the query structure "
402 "implies that searching this collection may be incidental."
403 )
404 supported_collection_records.append(record)
405 else:
406 supported_collection_records.append(record)
407 else:
408 supported_collection_records = filtered_collection_records
409 if not supported_collection_records and rejections is not None and not rejections:
410 rejections.append(f"No collections to search matching expression {collections!r}.")
411 return supported_collection_records
413 @abstractmethod
414 def make_dataset_query_relation(
415 self,
416 dataset_type: DatasetType,
417 collections: Sequence[CollectionRecord],
418 columns: Set[str],
419 context: _C,
420 ) -> Relation:
421 """Construct a relation that represents an unordered query for datasets
422 that returns matching results from all given collections.
424 Parameters
425 ----------
426 dataset_type : `DatasetType`
427 Type for the datasets being queried.
428 collections : `Sequence` [ `CollectionRecord` ]
429 Records for collections to query. Should generally be the result
430 of a call to `resolve_dataset_collections`, and must not be empty.
431 context : `QueryContext`
432 Context that manages per-query state.
433 columns : `~collections.abc.Set` [ `str` ]
434 Columns to include in the relation. See `Query.find_datasets` for
435 details.
436 Results
437 -------
438 relation : `lsst.daf.relation.Relation`
439 Relation representing a dataset query.
440 """
441 raise NotImplementedError()
443 def make_dataset_search_relation(
444 self,
445 dataset_type: DatasetType,
446 collections: Sequence[CollectionRecord],
447 columns: Set[str],
448 context: _C,
449 *,
450 join_to: Relation | None = None,
451 ) -> Relation:
452 """Construct a relation that represents an order query for datasets
453 that returns results from the first matching collection for each
454 data ID.
456 Parameters
457 ----------
458 dataset_type : `DatasetType`
459 Type for the datasets being search.
460 collections : `Sequence` [ `CollectionRecord` ]
461 Records for collections to search. Should generally be the result
462 of a call to `resolve_dataset_collections`, and must not be empty.
463 columns : `~collections.abc.Set` [ `str` ]
464 Columns to include in the `relation. See
465 `make_dataset_query_relation` for options.
466 context : `QueryContext`
467 Context that manages per-query state.
468 join_to : `Relation`, optional
469 Another relation to join with the query for datasets in all
470 collections before filtering out out shadowed datasets.
472 Results
473 -------
474 relation : `lsst.daf.relation.Relation`
475 Relation representing a find-first dataset search.
476 """
477 base = self.make_dataset_query_relation(
478 dataset_type,
479 collections,
480 columns | {"rank"},
481 context=context,
482 )
483 if join_to is not None:
484 base = join_to.join(base)
485 # Query-simplification shortcut: if there is only one collection, a
486 # find-first search is just a regular result subquery. Same if there
487 # are no collections.
488 if len(collections) <= 1:
489 return base
490 # We filter the dimension keys in the given relation through
491 # DimensionGraph.required.names to minimize the set we partition on
492 # and order it in a more index-friendly way. More precisely, any
493 # index we define on dimensions will be consistent with this order, but
494 # any particular index may not have the same dimension columns.
495 dimensions = self.universe.extract(
496 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)]
497 )
498 find_first = FindFirstDataset(
499 dimensions=DimensionKeyColumnTag.generate(dimensions.required.names),
500 rank=DatasetColumnTag(dataset_type.name, "rank"),
501 )
502 return find_first.apply(base).with_only_columns(base.columns - {find_first.rank})
504 def make_doomed_dataset_relation(
505 self,
506 dataset_type: DatasetType,
507 columns: Set[str],
508 messages: Iterable[str],
509 context: _C,
510 ) -> Relation:
511 """Construct a relation that represents a doomed query for datasets.
513 Parameters
514 ----------
515 dataset_type : `DatasetType`
516 Dataset type being queried.
517 columns : `AbstractSet` [ `str` ]
518 Dataset columns to include (dimension key columns are always
519 included). See `make_dataset_query_relation` for allowed values.
520 messages : `Iterable` [ `str` ]
521 Diagnostic messages that explain why the query is doomed to yield
522 no rows.
523 context : `QueryContext`
524 Context that manages per-query state.
526 Results
527 -------
528 relation : `lsst.daf.relation.Relation`
529 Relation with the requested columns and no rows.
530 """
531 column_tags: set[ColumnTag] = set(
532 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names)
533 )
534 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns))
535 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages))
537 @abstractmethod
538 def make_dimension_relation(
539 self,
540 dimensions: DimensionGraph,
541 columns: Set[ColumnTag],
542 context: _C,
543 *,
544 initial_relation: Relation | None = None,
545 initial_join_max_columns: frozenset[ColumnTag] | None = None,
546 initial_dimension_relationships: Set[frozenset[str]] | None = None,
547 spatial_joins: Iterable[tuple[str, str]] = (),
548 governor_constraints: Mapping[str, Set[str]],
549 ) -> Relation:
550 """Construct a relation that provides columns and constraints from
551 dimension records.
553 Parameters
554 ----------
555 dimensions : `DimensionGraph`
556 Dimensions to include. The key columns for all dimensions (both
557 required and implied) will be included in the returned relation.
558 columns : `~collections.abc.Set` [ `ColumnTag` ]
559 Dimension record columns to include. This set may include key
560 column tags as well, though these may be ignored; the set of key
561 columns to include is determined by the ``dimensions`` argument
562 instead.
563 context : `QueryContext`
564 Context that manages per-query state.
565 initial_relation : `~lsst.daf.relation.Relation`, optional
566 Initial relation to join to the dimension relations. If this
567 relation provides record columns, key columns, and relationships
568 between key columns (see ``initial_dimension_relationships`` below)
569 that would otherwise have been added by joining in a dimension
570 element's relation, that relation may not be joined in at all.
571 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional
572 Maximum superset of common columns for joins to
573 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL
574 ``JOIN`` clauses). If provided, this is a subset of the dimension
575 key columns in ``initial_relation``, which are otherwise all
576 considered as potential common columns for joins. Ignored if
577 ``initial_relation`` is not provided.
578 initial_dimension_relationships : `~collections.abc.Set` [ `frozenset`
579 [ `str` ] ], optional
580 A set of sets of dimension names representing relationships between
581 dimensions encoded in the rows of ``initial_relation``. If not
582 provided (and ``initial_relation`` is),
583 `extract_dimension_relationships` will be called on
584 ``initial_relation``.
585 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ]
586 Iterable of dimension element name pairs that should be spatially
587 joined.
588 governor_constraints : `Mapping` [ `str` [ `~collections.abc.Set`
589 [ `str` ] ] ], optional
590 Constraints on governor dimensions that are provided by other parts
591 of the query that either have been included in ``initial_relation``
592 or are guaranteed to be added in the future. This is a mapping from
593 governor dimension name to sets of values that dimension may take.
595 Results
596 -------
597 relation : `lsst.daf.relation.Relation`
598 Relation containing the given dimension columns and constraints.
599 """
600 raise NotImplementedError()
602 @abstractmethod
603 def resolve_governor_constraints(
604 self, dimensions: DimensionGraph, constraints: Mapping[str, Set[str]], context: _C
605 ) -> Mapping[str, Set[str]]:
606 """Resolve governor dimension constraints provided by user input to
607 a query against the content in the `Registry`.
609 Parameters
610 ----------
611 dimensions : `DimensionGraph`
612 Dimensions that bound the governor dimensions to consider (via
613 ``dimensions.governors``, more specifically).
614 constraints : `Mapping` [ `str`, [ `~collections.abc.Set`
615 [ `str` ] ] ]
616 Constraints from user input to the query (e.g. from data IDs and
617 string expression predicates).
618 context : `QueryContext`
619 Object that manages state for the query; used here to fetch the
620 governor dimension record cache if it has not already been loaded.
622 Returns
623 -------
624 resolved : `Mapping` [ `str`, [ `~collections.abc.Set`
625 [ `str` ] ] ]
626 A shallow copy of ``constraints`` with keys equal to
627 ``dimensions.governors.names` and value sets constrained by the
628 Registry content if they were not already in ``constraints``.
630 Raises
631 ------
632 DataIdValueError
633 Raised if ``constraints`` includes governor dimension values that
634 are not present in the `Registry`.
635 """
636 raise NotImplementedError()
638 @abstractmethod
639 def get_dimension_record_cache(
640 self, element_name: str, context: _C
641 ) -> Mapping[DataCoordinate, DimensionRecord] | None:
642 """Return a local cache of all `DimensionRecord` objects for a
643 dimension element, fetching it if necessary.
645 Parameters
646 ----------
647 element_name : `str`
648 Name of the dimension element.
649 context : `.queries.SqlQueryContext`
650 Context to be used to execute queries when no cached result is
651 available.
653 Returns
654 -------
655 cache : `Mapping` [ `DataCoordinate`, `DimensionRecord` ] or `None`
656 Mapping from data ID to dimension record, or `None` if this
657 element's records are never cached.
658 """
659 raise NotImplementedError()
661 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]:
662 """Extract the dimension key relationships encoded in a relation tree.
664 Parameters
665 ----------
666 relation : `Relation`
667 Relation tree to process.
669 Returns
670 -------
671 relationships : `set` [ `frozenset` [ `str` ] ]
672 Set of sets of dimension names, where each inner set represents a
673 relationship between dimensions.
675 Notes
676 -----
677 Dimension relationships include both many-to-one implied dependencies
678 and many-to-many joins backed by "always-join" dimension elements, and
679 it's important to join in the dimension table that defines a
680 relationship in any query involving dimensions that are a superset of
681 that relationship. For example, let's consider a relation tree that
682 joins dataset existence-check relations for two dataset types, with
683 dimensions ``{instrument, exposure, detector}`` and ``{instrument,
684 physical_filter}``. The joined relation appears to have all dimension
685 keys in its expanded graph present except ``band``, and the system
686 could easily correct this by joining that dimension in directly. But
687 it's also missing the ``{instrument, exposure, physical_filter}``
688 relationship we'd get from the ``exposure`` dimension's own relation
689 (``exposure`` implies ``phyiscal_filter``) and the similar
690 ``{instrument, physical_filter, band}`` relationship from the
691 ``physical_filter`` dimension relation; we need the relationship logic
692 to recognize that those dimensions need to be joined in as well in
693 order for the full relation to have rows that represent valid data IDs.
695 The implementation of this method relies on the assumption that
696 `LeafRelation` objects always have rows that are consistent with all
697 defined relationships (i.e. are valid data IDs). This is true for not
698 just dimension relations themselves, but anything created from queries
699 based on them, including datasets and query results. It is possible to
700 construct `LeafRelation` objects that don't satisfy this criteria (e.g.
701 when accepting in user-provided data IDs(, and in this case
702 higher-level guards or warnings must be provided.``
703 """
704 return {
705 frozenset(
706 tag.dimension
707 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns)
708 )
709 for leaf_relation in self._extract_leaf_relations(relation).values()
710 }
712 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]:
713 """Recursively extract leaf relations from a relation tree.
715 Parameters
716 ----------
717 relation : `Relation`
718 Tree to process.
720 Returns
721 -------
722 leaves : `dict` [ `str`, `LeafRelation` ]
723 Leaf relations, keyed and deduplicated by name.
724 """
725 match relation:
726 case LeafRelation() as leaf:
727 return {leaf.name: leaf}
728 case UnaryOperationRelation(target=target):
729 return self._extract_leaf_relations(target)
730 case BinaryOperationRelation(lhs=lhs, rhs=rhs):
731 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs)
732 case MarkerRelation(target=target):
733 return self._extract_leaf_relations(target)
734 raise AssertionError("Match should be exhaustive and all branches should return.")