Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 29%
105 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-26 02:04 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-26 02:04 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ("QueryBackend",)
25from abc import abstractmethod
26from collections.abc import Iterable, Mapping, Sequence, Set
27from typing import TYPE_CHECKING, Any, Generic, TypeVar
29from lsst.daf.relation import (
30 BinaryOperationRelation,
31 ColumnTag,
32 LeafRelation,
33 MarkerRelation,
34 Relation,
35 UnaryOperationRelation,
36)
38from ...core import (
39 DataCoordinate,
40 DatasetColumnTag,
41 DatasetType,
42 DimensionGraph,
43 DimensionKeyColumnTag,
44 DimensionRecord,
45 DimensionUniverse,
46)
47from .._collectionType import CollectionType
48from .._exceptions import DatasetTypeError, MissingDatasetTypeError
49from ..wildcards import CollectionWildcard
50from ._query_context import QueryContext
51from .find_first_dataset import FindFirstDataset
53if TYPE_CHECKING: 53 ↛ 54line 53 didn't jump to line 54, because the condition on line 53 was never true
54 from ..interfaces import CollectionRecord
57_C = TypeVar("_C", bound=QueryContext)
60class QueryBackend(Generic[_C]):
61 """An interface for constructing and evaluating the
62 `~lsst.daf.relation.Relation` objects that comprise registry queries.
64 This ABC is expected to have a concrete subclass for each concrete registry
65 type, and most subclasses will be paired with a `QueryContext` subclass.
66 See `QueryContext` for the division of responsibilities between these two
67 interfaces.
68 """
70 @property
71 @abstractmethod
72 def universe(self) -> DimensionUniverse:
73 """Definition of all dimensions and dimension elements for this
74 registry (`DimensionUniverse`).
75 """
76 raise NotImplementedError()
78 def context(self) -> _C:
79 """Return a context manager that can be used to execute queries with
80 this backend.
82 Returns
83 -------
84 context : `QueryContext`
85 Context manager that manages state and connections needed to
86 execute queries.
87 """
88 raise NotImplementedError()
90 @abstractmethod
91 def get_collection_name(self, key: Any) -> str:
92 """Return the collection name associated with a collection primary key
93 value.
95 Parameters
96 ----------
97 key
98 Collection primary key value.
100 Returns
101 -------
102 name : `str`
103 Collection name.
104 """
105 raise NotImplementedError()
107 @abstractmethod
108 def resolve_collection_wildcard(
109 self,
110 expression: Any,
111 *,
112 collection_types: Set[CollectionType] = CollectionType.all(),
113 done: set[str] | None = None,
114 flatten_chains: bool = True,
115 include_chains: bool | None = None,
116 ) -> list[CollectionRecord]:
117 """Return the collection records that match a wildcard expression.
119 Parameters
120 ----------
121 expression
122 Names and/or patterns for collections; will be passed to
123 `CollectionWildcard.from_expression`.
124 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
125 If provided, only yield collections of these types.
126 done : `set` [ `str` ], optional
127 A set of collection names that should be skipped, updated to
128 include all processed collection names on return.
129 flatten_chains : `bool`, optional
130 If `True` (default) recursively yield the child collections of
131 `~CollectionType.CHAINED` collections.
132 include_chains : `bool`, optional
133 If `False`, return records for `~CollectionType.CHAINED`
134 collections themselves. The default is the opposite of
135 ``flattenChains``: either return records for CHAINED collections or
136 their children, but not both.
138 Returns
139 -------
140 records : `list` [ `CollectionRecord` ]
141 Matching collection records.
142 """
143 raise NotImplementedError()
145 @abstractmethod
146 def resolve_dataset_type_wildcard(
147 self,
148 expression: Any,
149 components: bool | None = None,
150 missing: list[str] | None = None,
151 explicit_only: bool = False,
152 components_deprecated: bool = True,
153 ) -> dict[DatasetType, list[str | None]]:
154 """Return the dataset types that match a wildcard expression.
156 Parameters
157 ----------
158 expression
159 Names and/or patterns for dataset types; will be passed to
160 `DatasetTypeWildcard.from_expression`.
161 components : `bool`, optional
162 If `True`, apply all expression patterns to component dataset type
163 names as well. If `False`, never apply patterns to components. If
164 `None` (default), apply patterns to components only if their parent
165 datasets were not matched by the expression. Fully-specified
166 component datasets (`str` or `DatasetType` instances) are always
167 included.
168 missing : `list` of `str`, optional
169 String dataset type names that were explicitly given (i.e. not
170 regular expression patterns) but not found will be appended to this
171 list, if it is provided.
172 explicit_only : `bool`, optional
173 If `True`, require explicit `DatasetType` instances or `str` names,
174 with `re.Pattern` instances deprecated and ``...`` prohibited.
175 components_deprecated : `bool`, optional
176 If `True`, this is a context in which component dataset support is
177 deprecated. This will result in a deprecation warning when
178 ``components=True`` or ``components=None`` and a component dataset
179 is matched. In the future this will become an error.
181 Returns
182 -------
183 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ]
184 A mapping with resolved dataset types as keys and lists of
185 matched component names as values, where `None` indicates the
186 parent composite dataset type was matched.
187 """
188 raise NotImplementedError()
190 def resolve_single_dataset_type_wildcard(
191 self,
192 expression: Any,
193 components: bool | None = None,
194 explicit_only: bool = False,
195 components_deprecated: bool = True,
196 ) -> tuple[DatasetType, list[str | None]]:
197 """Return a single dataset type that matches a wildcard expression.
199 Parameters
200 ----------
201 expression
202 Names and/or patterns for the dataset type; will be passed to
203 `DatasetTypeWildcard.from_expression`.
204 components : `bool`, optional
205 If `True`, apply all expression patterns to component dataset type
206 names as well. If `False`, never apply patterns to components. If
207 `None` (default), apply patterns to components only if their parent
208 datasets were not matched by the expression. Fully-specified
209 component datasets (`str` or `DatasetType` instances) are always
210 included.
211 explicit_only : `bool`, optional
212 If `True`, require explicit `DatasetType` instances or `str` names,
213 with `re.Pattern` instances deprecated and ``...`` prohibited.
214 components_deprecated : `bool`, optional
215 If `True`, this is a context in which component dataset support is
216 deprecated. This will result in a deprecation warning when
217 ``components=True`` or ``components=None`` and a component dataset
218 is matched. In the future this will become an error.
220 Returns
221 -------
222 single_parent : `DatasetType`
223 The matched parent dataset type.
224 single_components : `list` [ `str` | `None` ]
225 The matched components that correspond to this parent, or `None` if
226 the parent dataset type itself was matched.
228 Notes
229 -----
230 This method really finds a single parent dataset type and any number of
231 components, because it's only the parent dataset type that's known to
232 registry at all; many callers are expected to discard the
233 ``single_components`` return value.
234 """
235 missing: list[str] = []
236 matching = self.resolve_dataset_type_wildcard(
237 expression,
238 components=components,
239 missing=missing,
240 explicit_only=explicit_only,
241 components_deprecated=components_deprecated,
242 )
243 if not matching:
244 if missing:
245 raise MissingDatasetTypeError(
246 "\n".join(
247 f"Dataset type {t!r} is not registered, so no instances of it can exist."
248 for t in missing
249 )
250 )
251 else:
252 raise MissingDatasetTypeError(
253 f"No registered dataset types matched expression {expression!r}, "
254 "so no datasets will be found."
255 )
256 if len(matching) > 1:
257 raise DatasetTypeError(
258 f"Expression {expression!r} matched multiple parent dataset types: "
259 f"{[t.name for t in matching]}, but only one is allowed."
260 )
261 ((single_parent, single_components),) = matching.items()
262 if missing:
263 raise DatasetTypeError(
264 f"Expression {expression!r} appears to involve multiple dataset types, even though only "
265 f"one ({single_parent.name}) is registered, and only one is allowed here."
266 )
267 return single_parent, single_components
269 @abstractmethod
270 def filter_dataset_collections(
271 self,
272 dataset_types: Iterable[DatasetType],
273 collections: Sequence[CollectionRecord],
274 *,
275 governor_constraints: Mapping[str, Set[str]],
276 rejections: list[str] | None = None,
277 ) -> dict[DatasetType, list[CollectionRecord]]:
278 """Filter a sequence of collections to those for which a dataset query
279 might succeed.
281 Parameters
282 ----------
283 dataset_types : `Iterable` [ `DatasetType` ]
284 Dataset types that are being queried. Must include only parent
285 or standalone dataset types, not components.
286 collections : `Sequence` [ `CollectionRecord` ]
287 Sequence of collections that will be searched.
288 governor_constraints : `Mapping` [ `str`, `~collections.abc.Set` ], \
289 optional
290 Constraints imposed by other aspects of the query on governor
291 dimensions; collections inconsistent with these constraints will be
292 skipped.
293 rejections : `list` [ `str` ], optional
294 If not `None`, a `list` that diagnostic messages will be appended
295 to, for any collection that matches ``collections`` that is not
296 returned. At least one message is guaranteed whenever the result
297 is empty.
299 Returns
300 -------
301 dataset_collections : `dict` [ `DatasetType`, \
302 `list` [ `CollectionRecord` ] ]
303 The collections to search for each dataset. The dictionary's keys
304 are always exactly ``dataset_types`` (in the same order), and each
305 nested `list` of collections is ordered consistently with the
306 given ``collections``.
308 Notes
309 -----
310 This method accepts multiple dataset types and multiple collections at
311 once to enable implementations to batch up the fetching of summary
312 information needed to relate them.
313 """
314 raise NotImplementedError()
316 def resolve_dataset_collections(
317 self,
318 dataset_type: DatasetType,
319 collections: CollectionWildcard,
320 *,
321 governor_constraints: Mapping[str, Set[str]],
322 rejections: list[str] | None = None,
323 collection_types: Set[CollectionType] = CollectionType.all(),
324 allow_calibration_collections: bool = False,
325 ) -> list[CollectionRecord]:
326 """Resolve the sequence of collections to query for a dataset type.
328 Parameters
329 ----------
330 dataset_type : `DatasetType`
331 Dataset type to be queried in the returned collections.
332 collections : `CollectionWildcard`
333 Expression for the collections to be queried.
334 governor_constraints : `Mapping` [ `str`, `~collections.abc.Set` ], \
335 optional
336 Constraints imposed by other aspects of the query on governor
337 dimensions; collections inconsistent with these constraints will be
338 skipped.
339 rejections : `list` [ `str` ], optional
340 If not `None`, a `list` that diagnostic messages will be appended
341 to, for any collection that matches ``collections`` that is not
342 returned. At least one message is guaranteed whenever the result
343 is empty.
344 collection_types : `~collections.abc.Set` [ `CollectionType` ], \
345 optional
346 Collection types to consider when resolving the collection
347 expression.
348 allow_calibration_collections : `bool`, optional
349 If `False`, skip (with a ``rejections`` message) any calibration
350 collections that match ``collections`` are not given explicitly by
351 name, and raise `NotImplementedError` for any calibration
352 collection that is given explicitly. This is a temporary option
353 that will be removed when the query system can handle temporal
354 joins involving calibration collections.
356 Returns
357 -------
358 records : `list` [ `CollectionRecord` ]
359 A new list of `CollectionRecord` instances, for collections that
360 both match ``collections`` and may have datasets of the given type.
362 Notes
363 -----
364 This is a higher-level driver for `resolve_collection_wildcard` and
365 `filter_dataset_collections` that is mostly concerned with handling
366 queries against `~Collection.Type.CALIBRATION` collections that aren't
367 fully supported yet. Once that support improves, this method may be
368 removed.
369 """
370 if collections == CollectionWildcard() and collection_types == CollectionType.all():
371 collection_types = {CollectionType.RUN}
372 explicit_collections = frozenset(collections.strings)
373 matching_collection_records = self.resolve_collection_wildcard(
374 collections, collection_types=collection_types
375 )
376 ((_, filtered_collection_records),) = self.filter_dataset_collections(
377 [dataset_type],
378 matching_collection_records,
379 governor_constraints=governor_constraints,
380 rejections=rejections,
381 ).items()
382 if not allow_calibration_collections:
383 supported_collection_records: list[CollectionRecord] = []
384 for record in filtered_collection_records:
385 if record.type is CollectionType.CALIBRATION:
386 # If collection name was provided explicitly then raise,
387 # since this is a kind of query we don't support yet;
388 # otherwise collection is a part of a chained one or regex
389 # match, and we skip it to not break queries of other
390 # included collections.
391 if record.name in explicit_collections:
392 raise NotImplementedError(
393 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type "
394 f"collection {record.name!r} is not yet supported."
395 )
396 else:
397 if rejections is not None:
398 rejections.append(
399 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION "
400 f"collection {record.name!r} because calibration queries aren't fully "
401 "implemented; this is not an error only because the query structure "
402 "implies that searching this collection may be incidental."
403 )
404 supported_collection_records.append(record)
405 else:
406 supported_collection_records.append(record)
407 else:
408 supported_collection_records = filtered_collection_records
409 if not supported_collection_records and rejections is not None and not rejections:
410 rejections.append(f"No collections to search matching expression {collections!r}.")
411 return supported_collection_records
413 @abstractmethod
414 def make_dataset_query_relation(
415 self,
416 dataset_type: DatasetType,
417 collections: Sequence[CollectionRecord],
418 columns: Set[str],
419 context: _C,
420 ) -> Relation:
421 """Construct a relation that represents an unordered query for datasets
422 that returns matching results from all given collections.
424 Parameters
425 ----------
426 dataset_type : `DatasetType`
427 Type for the datasets being queried.
428 collections : `Sequence` [ `CollectionRecord` ]
429 Records for collections to query. Should generally be the result
430 of a call to `resolve_dataset_collections`, and must not be empty.
431 context : `QueryContext`
432 Context that manages per-query state.
433 columns : `~collections.abc.Set` [ `str` ]
434 Columns to include in the relation. See `Query.find_datasets` for
435 details.
436 Results
437 -------
438 relation : `lsst.daf.relation.Relation`
439 Relation representing a dataset query.
440 """
441 raise NotImplementedError()
443 def make_dataset_search_relation(
444 self,
445 dataset_type: DatasetType,
446 collections: Sequence[CollectionRecord],
447 columns: Set[str],
448 context: _C,
449 *,
450 join_to: Relation | None = None,
451 ) -> Relation:
452 """Construct a relation that represents an order query for datasets
453 that returns results from the first matching collection for each
454 data ID.
456 Parameters
457 ----------
458 dataset_type : `DatasetType`
459 Type for the datasets being search.
460 collections : `Sequence` [ `CollectionRecord` ]
461 Records for collections to search. Should generally be the result
462 of a call to `resolve_dataset_collections`, and must not be empty.
463 columns : `~collections.abc.Set` [ `str` ]
464 Columns to include in the `relation. See
465 `make_dataset_query_relation` for options.
466 context : `QueryContext`
467 Context that manages per-query state.
468 join_to : `Relation`, optional
469 Another relation to join with the query for datasets in all
470 collections before filtering out out shadowed datasets.
472 Results
473 -------
474 relation : `lsst.daf.relation.Relation`
475 Relation representing a find-first dataset search.
476 """
477 base = self.make_dataset_query_relation(
478 dataset_type,
479 collections,
480 columns | {"rank"},
481 context=context,
482 )
483 if join_to is not None:
484 base = join_to.join(base)
485 # Query-simplification shortcut: if there is only one collection, a
486 # find-first search is just a regular result subquery. Same if there
487 # are no collections.
488 if len(collections) <= 1:
489 return base
490 # We filter the dimension keys in the given relation through
491 # DimensionGraph.required.names to minimize the set we partition on
492 # and order it in a more index-friendly way. More precisely, any
493 # index we define on dimensions will be consistent with this order, but
494 # any particular index may not have the same dimension columns.
495 dimensions = self.universe.extract(
496 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)]
497 )
498 find_first = FindFirstDataset(
499 dimensions=DimensionKeyColumnTag.generate(dimensions.required.names),
500 rank=DatasetColumnTag(dataset_type.name, "rank"),
501 )
502 return find_first.apply(
503 base, preferred_engine=context.preferred_engine, require_preferred_engine=True
504 ).with_only_columns(base.columns - {find_first.rank})
506 def make_doomed_dataset_relation(
507 self,
508 dataset_type: DatasetType,
509 columns: Set[str],
510 messages: Iterable[str],
511 context: _C,
512 ) -> Relation:
513 """Construct a relation that represents a doomed query for datasets.
515 Parameters
516 ----------
517 dataset_type : `DatasetType`
518 Dataset type being queried.
519 columns : `AbstractSet` [ `str` ]
520 Dataset columns to include (dimension key columns are always
521 included). See `make_dataset_query_relation` for allowed values.
522 messages : `Iterable` [ `str` ]
523 Diagnostic messages that explain why the query is doomed to yield
524 no rows.
525 context : `QueryContext`
526 Context that manages per-query state.
528 Results
529 -------
530 relation : `lsst.daf.relation.Relation`
531 Relation with the requested columns and no rows.
532 """
533 column_tags: set[ColumnTag] = set(
534 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names)
535 )
536 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns))
537 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages))
539 @abstractmethod
540 def make_dimension_relation(
541 self,
542 dimensions: DimensionGraph,
543 columns: Set[ColumnTag],
544 context: _C,
545 *,
546 initial_relation: Relation | None = None,
547 initial_join_max_columns: frozenset[ColumnTag] | None = None,
548 initial_dimension_relationships: Set[frozenset[str]] | None = None,
549 spatial_joins: Iterable[tuple[str, str]] = (),
550 governor_constraints: Mapping[str, Set[str]],
551 ) -> Relation:
552 """Construct a relation that provides columns and constraints from
553 dimension records.
555 Parameters
556 ----------
557 dimensions : `DimensionGraph`
558 Dimensions to include. The key columns for all dimensions (both
559 required and implied) will be included in the returned relation.
560 columns : `~collections.abc.Set` [ `ColumnTag` ]
561 Dimension record columns to include. This set may include key
562 column tags as well, though these may be ignored; the set of key
563 columns to include is determined by the ``dimensions`` argument
564 instead.
565 context : `QueryContext`
566 Context that manages per-query state.
567 initial_relation : `~lsst.daf.relation.Relation`, optional
568 Initial relation to join to the dimension relations. If this
569 relation provides record columns, key columns, and relationships
570 between key columns (see ``initial_dimension_relationships`` below)
571 that would otherwise have been added by joining in a dimension
572 element's relation, that relation may not be joined in at all.
573 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional
574 Maximum superset of common columns for joins to
575 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL
576 ``JOIN`` clauses). If provided, this is a subset of the dimension
577 key columns in ``initial_relation``, which are otherwise all
578 considered as potential common columns for joins. Ignored if
579 ``initial_relation`` is not provided.
580 initial_dimension_relationships : `~collections.abc.Set` [ `frozenset`
581 [ `str` ] ], optional
582 A set of sets of dimension names representing relationships between
583 dimensions encoded in the rows of ``initial_relation``. If not
584 provided (and ``initial_relation`` is),
585 `extract_dimension_relationships` will be called on
586 ``initial_relation``.
587 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ]
588 Iterable of dimension element name pairs that should be spatially
589 joined.
590 governor_constraints : `Mapping` [ `str` [ `~collections.abc.Set`
591 [ `str` ] ] ], optional
592 Constraints on governor dimensions that are provided by other parts
593 of the query that either have been included in ``initial_relation``
594 or are guaranteed to be added in the future. This is a mapping from
595 governor dimension name to sets of values that dimension may take.
597 Results
598 -------
599 relation : `lsst.daf.relation.Relation`
600 Relation containing the given dimension columns and constraints.
601 """
602 raise NotImplementedError()
604 @abstractmethod
605 def resolve_governor_constraints(
606 self, dimensions: DimensionGraph, constraints: Mapping[str, Set[str]], context: _C
607 ) -> Mapping[str, Set[str]]:
608 """Resolve governor dimension constraints provided by user input to
609 a query against the content in the `Registry`.
611 Parameters
612 ----------
613 dimensions : `DimensionGraph`
614 Dimensions that bound the governor dimensions to consider (via
615 ``dimensions.governors``, more specifically).
616 constraints : `Mapping` [ `str`, [ `~collections.abc.Set`
617 [ `str` ] ] ]
618 Constraints from user input to the query (e.g. from data IDs and
619 string expression predicates).
620 context : `QueryContext`
621 Object that manages state for the query; used here to fetch the
622 governor dimension record cache if it has not already been loaded.
624 Returns
625 -------
626 resolved : `Mapping` [ `str`, [ `~collections.abc.Set`
627 [ `str` ] ] ]
628 A shallow copy of ``constraints`` with keys equal to
629 ``dimensions.governors.names` and value sets constrained by the
630 Registry content if they were not already in ``constraints``.
632 Raises
633 ------
634 DataIdValueError
635 Raised if ``constraints`` includes governor dimension values that
636 are not present in the `Registry`.
637 """
638 raise NotImplementedError()
640 @abstractmethod
641 def get_dimension_record_cache(
642 self, element_name: str, context: _C
643 ) -> Mapping[DataCoordinate, DimensionRecord] | None:
644 """Return a local cache of all `DimensionRecord` objects for a
645 dimension element, fetching it if necessary.
647 Parameters
648 ----------
649 element_name : `str`
650 Name of the dimension element.
651 context : `.queries.SqlQueryContext`
652 Context to be used to execute queries when no cached result is
653 available.
655 Returns
656 -------
657 cache : `Mapping` [ `DataCoordinate`, `DimensionRecord` ] or `None`
658 Mapping from data ID to dimension record, or `None` if this
659 element's records are never cached.
660 """
661 raise NotImplementedError()
663 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]:
664 """Extract the dimension key relationships encoded in a relation tree.
666 Parameters
667 ----------
668 relation : `Relation`
669 Relation tree to process.
671 Returns
672 -------
673 relationships : `set` [ `frozenset` [ `str` ] ]
674 Set of sets of dimension names, where each inner set represents a
675 relationship between dimensions.
677 Notes
678 -----
679 Dimension relationships include both many-to-one implied dependencies
680 and many-to-many joins backed by "always-join" dimension elements, and
681 it's important to join in the dimension table that defines a
682 relationship in any query involving dimensions that are a superset of
683 that relationship. For example, let's consider a relation tree that
684 joins dataset existence-check relations for two dataset types, with
685 dimensions ``{instrument, exposure, detector}`` and ``{instrument,
686 physical_filter}``. The joined relation appears to have all dimension
687 keys in its expanded graph present except ``band``, and the system
688 could easily correct this by joining that dimension in directly. But
689 it's also missing the ``{instrument, exposure, physical_filter}``
690 relationship we'd get from the ``exposure`` dimension's own relation
691 (``exposure`` implies ``phyiscal_filter``) and the similar
692 ``{instrument, physical_filter, band}`` relationship from the
693 ``physical_filter`` dimension relation; we need the relationship logic
694 to recognize that those dimensions need to be joined in as well in
695 order for the full relation to have rows that represent valid data IDs.
697 The implementation of this method relies on the assumption that
698 `LeafRelation` objects always have rows that are consistent with all
699 defined relationships (i.e. are valid data IDs). This is true for not
700 just dimension relations themselves, but anything created from queries
701 based on them, including datasets and query results. It is possible to
702 construct `LeafRelation` objects that don't satisfy this criteria (e.g.
703 when accepting in user-provided data IDs(, and in this case
704 higher-level guards or warnings must be provided.``
705 """
706 return {
707 frozenset(
708 tag.dimension
709 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns)
710 )
711 for leaf_relation in self._extract_leaf_relations(relation).values()
712 }
714 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]:
715 """Recursively extract leaf relations from a relation tree.
717 Parameters
718 ----------
719 relation : `Relation`
720 Tree to process.
722 Returns
723 -------
724 leaves : `dict` [ `str`, `LeafRelation` ]
725 Leaf relations, keyed and deduplicated by name.
726 """
727 match relation:
728 case LeafRelation() as leaf:
729 return {leaf.name: leaf}
730 case UnaryOperationRelation(target=target):
731 return self._extract_leaf_relations(target)
732 case BinaryOperationRelation(lhs=lhs, rhs=rhs):
733 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs)
734 case MarkerRelation(target=target):
735 return self._extract_leaf_relations(target)
736 raise AssertionError("Match should be exhaustive and all branches should return.")