Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 40%
113 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-27 03:00 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-27 03:00 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import _timespan
31__all__ = ("QueryBackend",)
33from abc import abstractmethod
34from collections.abc import Iterable, Mapping, Sequence, Set
35from contextlib import AbstractContextManager
36from typing import TYPE_CHECKING, Any, Generic, TypeVar
38from lsst.daf.relation import (
39 BinaryOperationRelation,
40 ColumnExpression,
41 ColumnTag,
42 LeafRelation,
43 MarkerRelation,
44 Predicate,
45 Relation,
46 UnaryOperationRelation,
47)
49from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag
50from ..._dataset_type import DatasetType
51from ..._exceptions import MissingDatasetTypeError
52from ..._exceptions_legacy import DatasetTypeError
53from ...dimensions import DimensionGroup, DimensionRecordSet, DimensionUniverse
54from .._collection_type import CollectionType
55from ..wildcards import CollectionWildcard
56from ._query_context import QueryContext
57from .find_first_dataset import FindFirstDataset
59if TYPE_CHECKING:
60 from ..interfaces import CollectionRecord
63_C = TypeVar("_C", bound=QueryContext)
66class QueryBackend(Generic[_C]):
67 """An interface for constructing and evaluating the
68 `~lsst.daf.relation.Relation` objects that comprise registry queries.
70 This ABC is expected to have a concrete subclass for each concrete registry
71 type, and most subclasses will be paired with a `QueryContext` subclass.
72 See `QueryContext` for the division of responsibilities between these two
73 interfaces.
74 """
76 @property
77 @abstractmethod
78 def universe(self) -> DimensionUniverse:
79 """Definition of all dimensions and dimension elements for this
80 registry (`DimensionUniverse`).
81 """
82 raise NotImplementedError()
84 @abstractmethod
85 def caching_context(self) -> AbstractContextManager[None]:
86 """Enable caching of collection records and summaries for the duration
87 of the returned context manager.
88 """
89 raise NotImplementedError()
91 def context(self) -> _C:
92 """Return a context manager that can be used to execute queries with
93 this backend.
95 Returns
96 -------
97 context : `QueryContext`
98 Context manager that manages state and connections needed to
99 execute queries.
100 """
101 raise NotImplementedError()
103 @abstractmethod
104 def get_collection_name(self, key: Any) -> str:
105 """Return the collection name associated with a collection primary key
106 value.
108 Parameters
109 ----------
110 key : `~typing.Any`
111 Collection primary key value.
113 Returns
114 -------
115 name : `str`
116 Collection name.
117 """
118 raise NotImplementedError()
120 @abstractmethod
121 def resolve_collection_wildcard(
122 self,
123 expression: Any,
124 *,
125 collection_types: Set[CollectionType] = CollectionType.all(),
126 done: set[str] | None = None,
127 flatten_chains: bool = True,
128 include_chains: bool | None = None,
129 ) -> list[CollectionRecord]:
130 """Return the collection records that match a wildcard expression.
132 Parameters
133 ----------
134 expression : `~typing.Any`
135 Names and/or patterns for collections; will be passed to
136 `CollectionWildcard.from_expression`.
137 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
138 If provided, only yield collections of these types.
139 done : `set` [ `str` ], optional
140 A set of collection names that should be skipped, updated to
141 include all processed collection names on return.
142 flatten_chains : `bool`, optional
143 If `True` (default) recursively yield the child collections of
144 `~CollectionType.CHAINED` collections.
145 include_chains : `bool`, optional
146 If `False`, return records for `~CollectionType.CHAINED`
147 collections themselves. The default is the opposite of
148 ``flattenChains``: either return records for CHAINED collections or
149 their children, but not both.
151 Returns
152 -------
153 records : `list` [ `CollectionRecord` ]
154 Matching collection records.
155 """
156 raise NotImplementedError()
158 @abstractmethod
159 def resolve_dataset_type_wildcard(
160 self,
161 expression: Any,
162 missing: list[str] | None = None,
163 explicit_only: bool = False,
164 ) -> list[DatasetType]:
165 """Return the dataset types that match a wildcard expression.
167 Parameters
168 ----------
169 expression : `~typing.Any`
170 Names and/or patterns for dataset types; will be passed to
171 `DatasetTypeWildcard.from_expression`.
172 missing : `list` of `str`, optional
173 String dataset type names that were explicitly given (i.e. not
174 regular expression patterns) but not found will be appended to this
175 list, if it is provided.
176 explicit_only : `bool`, optional
177 If `True`, require explicit `DatasetType` instances or `str` names,
178 with `re.Pattern` instances deprecated and ``...`` prohibited.
180 Returns
181 -------
182 dataset_types : `list` [ `DatasetType` ]
183 A list of resolved dataset types.
184 """
185 raise NotImplementedError()
187 def resolve_single_dataset_type_wildcard(
188 self,
189 expression: Any,
190 explicit_only: bool = False,
191 ) -> DatasetType:
192 """Return a single dataset type that matches a wildcard expression.
194 Parameters
195 ----------
196 expression : `~typing.Any`
197 Names and/or patterns for the dataset type; will be passed to
198 `DatasetTypeWildcard.from_expression`.
199 explicit_only : `bool`, optional
200 If `True`, require explicit `DatasetType` instances or `str` names,
201 with `re.Pattern` instances deprecated and ``...`` prohibited.
203 Returns
204 -------
205 single : `DatasetType`
206 The matched dataset type.
207 """
208 missing: list[str] = []
209 matching = self.resolve_dataset_type_wildcard(
210 expression, missing=missing, explicit_only=explicit_only
211 )
212 if not matching:
213 if missing:
214 raise MissingDatasetTypeError(
215 "\n".join(
216 f"Dataset type {t!r} is not registered, so no instances of it can exist."
217 for t in missing
218 )
219 )
220 else:
221 raise MissingDatasetTypeError(
222 f"No registered dataset types matched expression {expression!r}, "
223 "so no datasets will be found."
224 )
225 if len(matching) > 1:
226 raise DatasetTypeError(
227 f"Expression {expression!r} matched multiple parent dataset types: "
228 f"{[t.name for t in matching]}, but only one is allowed."
229 )
230 (single_parent,) = matching
231 if missing:
232 raise DatasetTypeError(
233 f"Expression {expression!r} appears to involve multiple dataset types, even though only "
234 f"one ({single_parent.name}) is registered, and only one is allowed here."
235 )
236 return single_parent
238 @abstractmethod
239 def filter_dataset_collections(
240 self,
241 dataset_types: Iterable[DatasetType],
242 collections: Sequence[CollectionRecord],
243 *,
244 governor_constraints: Mapping[str, Set[str]],
245 rejections: list[str] | None = None,
246 ) -> dict[DatasetType, list[CollectionRecord]]:
247 """Filter a sequence of collections to those for which a dataset query
248 might succeed.
250 Parameters
251 ----------
252 dataset_types : `~collections.abc.Iterable` [ `DatasetType` ]
253 Dataset types that are being queried. Must include only parent
254 or standalone dataset types, not components.
255 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
256 Sequence of collections that will be searched.
257 governor_constraints : `~collections.abc.Mapping` [ `str`, \
258 `~collections.abc.Set` [ `str` ] ], optional
259 Constraints imposed by other aspects of the query on governor
260 dimensions; collections inconsistent with these constraints will be
261 skipped.
262 rejections : `list` [ `str` ], optional
263 If not `None`, a `list` that diagnostic messages will be appended
264 to, for any collection that matches ``collections`` that is not
265 returned. At least one message is guaranteed whenever the result
266 is empty.
268 Returns
269 -------
270 dataset_collections : `dict` [ `DatasetType`, \
271 `list` [ `CollectionRecord` ] ]
272 The collections to search for each dataset. The dictionary's keys
273 are always exactly ``dataset_types`` (in the same order), and each
274 nested `list` of collections is ordered consistently with the
275 given ``collections``.
277 Notes
278 -----
279 This method accepts multiple dataset types and multiple collections at
280 once to enable implementations to batch up the fetching of summary
281 information needed to relate them.
282 """
283 raise NotImplementedError()
285 def resolve_dataset_collections(
286 self,
287 dataset_type: DatasetType,
288 collections: CollectionWildcard,
289 *,
290 governor_constraints: Mapping[str, Set[str]],
291 rejections: list[str] | None = None,
292 collection_types: Set[CollectionType] = CollectionType.all(),
293 allow_calibration_collections: bool = False,
294 ) -> list[CollectionRecord]:
295 """Resolve the sequence of collections to query for a dataset type.
297 Parameters
298 ----------
299 dataset_type : `DatasetType`
300 Dataset type to be queried in the returned collections.
301 collections : `CollectionWildcard`
302 Expression for the collections to be queried.
303 governor_constraints : `~collections.abc.Mapping` [ `str`, \
304 `~collections.abc.Set` ], optional
305 Constraints imposed by other aspects of the query on governor
306 dimensions; collections inconsistent with these constraints will be
307 skipped.
308 rejections : `list` [ `str` ], optional
309 If not `None`, a `list` that diagnostic messages will be appended
310 to, for any collection that matches ``collections`` that is not
311 returned. At least one message is guaranteed whenever the result
312 is empty.
313 collection_types : `~collections.abc.Set` [ `CollectionType` ], \
314 optional
315 Collection types to consider when resolving the collection
316 expression.
317 allow_calibration_collections : `bool`, optional
318 If `False`, skip (with a ``rejections`` message) any calibration
319 collections that match ``collections`` are not given explicitly by
320 name, and raise `NotImplementedError` for any calibration
321 collection that is given explicitly. This is a temporary option
322 that will be removed when the query system can handle temporal
323 joins involving calibration collections.
325 Returns
326 -------
327 records : `list` [ `CollectionRecord` ]
328 A new list of `CollectionRecord` instances, for collections that
329 both match ``collections`` and may have datasets of the given type.
331 Notes
332 -----
333 This is a higher-level driver for `resolve_collection_wildcard` and
334 `filter_dataset_collections` that is mostly concerned with handling
335 queries against `~Collection.Type.CALIBRATION` collections that aren't
336 fully supported yet. Once that support improves, this method may be
337 removed.
338 """
339 if collections == CollectionWildcard() and collection_types == CollectionType.all():
340 collection_types = {CollectionType.RUN}
341 explicit_collections = frozenset(collections.strings)
342 matching_collection_records = self.resolve_collection_wildcard(
343 collections, collection_types=collection_types
344 )
345 ((_, filtered_collection_records),) = self.filter_dataset_collections(
346 [dataset_type],
347 matching_collection_records,
348 governor_constraints=governor_constraints,
349 rejections=rejections,
350 ).items()
351 if not allow_calibration_collections:
352 supported_collection_records: list[CollectionRecord] = []
353 for record in filtered_collection_records:
354 if record.type is CollectionType.CALIBRATION:
355 # If collection name was provided explicitly then raise,
356 # since this is a kind of query we don't support yet;
357 # otherwise collection is a part of a chained one or regex
358 # match, and we skip it to not break queries of other
359 # included collections.
360 if record.name in explicit_collections:
361 raise NotImplementedError(
362 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type "
363 f"collection {record.name!r} is not yet supported."
364 )
365 else:
366 if rejections is not None:
367 rejections.append(
368 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION "
369 f"collection {record.name!r} because calibration queries aren't fully "
370 "implemented; this is not an error only because the query structure "
371 "implies that searching this collection may be incidental."
372 )
373 supported_collection_records.append(record)
374 else:
375 supported_collection_records.append(record)
376 else:
377 supported_collection_records = filtered_collection_records
378 if not supported_collection_records and rejections is not None and not rejections:
379 rejections.append(f"No collections to search matching expression {collections!r}.")
380 return supported_collection_records
382 @abstractmethod
383 def _make_dataset_query_relation_impl(
384 self,
385 dataset_type: DatasetType,
386 collections: Sequence[CollectionRecord],
387 columns: Set[str],
388 context: _C,
389 ) -> Relation:
390 """Construct a relation that represents an unordered query for datasets
391 that returns matching results from all given collections.
393 Parameters
394 ----------
395 dataset_type : `DatasetType`
396 Type for the datasets being queried.
397 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
398 Records for collections to query. Should generally be the result
399 of a call to `resolve_dataset_collections`, and must not be empty.
400 context : `QueryContext`
401 Context that manages per-query state.
402 columns : `~collections.abc.Set` [ `str` ]
403 Columns to include in the relation. See `Query.find_datasets` for
404 details.
406 Returns
407 -------
408 relation : `lsst.daf.relation.Relation`
409 Relation representing a dataset query.
411 Notes
412 -----
413 This method must be implemented by derived classes but is not
414 responsible for joining the resulting relation to an existing relation.
415 """
416 raise NotImplementedError()
418 def make_dataset_query_relation(
419 self,
420 dataset_type: DatasetType,
421 collections: Sequence[CollectionRecord],
422 columns: Set[str],
423 context: _C,
424 *,
425 join_to: Relation | None = None,
426 temporal_join_on: Set[ColumnTag] = frozenset(),
427 ) -> Relation:
428 """Construct a relation that represents an unordered query for datasets
429 that returns matching results from all given collections.
431 Parameters
432 ----------
433 dataset_type : `DatasetType`
434 Type for the datasets being queried.
435 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
436 Records for collections to query. Should generally be the result
437 of a call to `resolve_dataset_collections`, and must not be empty.
438 columns : `~collections.abc.Set` [ `str` ]
439 Columns to include in the relation. See `Query.find_datasets` for
440 details.
441 context : `QueryContext`
442 Context that manages per-query state.
443 join_to : `Relation`, optional
444 Another relation to join with the query for datasets in all
445 collections.
446 temporal_join_on : `~collections.abc.Set` [ `ColumnTag` ], optional
447 Timespan columns in ``join_to`` that calibration dataset timespans
448 must overlap. Must already be present in ``join_to``. Ignored if
449 ``join_to`` is `None` or if there are no calibration collections.
451 Returns
452 -------
453 relation : `lsst.daf.relation.Relation`
454 Relation representing a dataset query.
455 """
456 # If we need to do a temporal join to a calibration collection, we need
457 # to include the timespan column in the base query and prepare the join
458 # predicate.
459 join_predicates: list[Predicate] = []
460 base_timespan_tag: ColumnTag | None = None
461 full_columns: set[str] = set(columns)
462 if (
463 temporal_join_on
464 and join_to is not None
465 and any(r.type is CollectionType.CALIBRATION for r in collections)
466 ):
467 base_timespan_tag = DatasetColumnTag(dataset_type.name, "timespan")
468 rhs = ColumnExpression.reference(base_timespan_tag, dtype=_timespan.Timespan)
469 full_columns.add("timespan")
470 for timespan_tag in temporal_join_on:
471 lhs = ColumnExpression.reference(timespan_tag, dtype=_timespan.Timespan)
472 join_predicates.append(lhs.predicate_method("overlaps", rhs))
473 # Delegate to the concrete QueryBackend subclass to do most of the
474 # work.
475 result = self._make_dataset_query_relation_impl(
476 dataset_type,
477 collections,
478 full_columns,
479 context=context,
480 )
481 if join_to is not None:
482 result = join_to.join(
483 result, predicate=Predicate.logical_and(*join_predicates) if join_predicates else None
484 )
485 if join_predicates and "timespan" not in columns:
486 # Drop the timespan column we added for the join only if the
487 # timespan wasn't requested in its own right.
488 result = result.with_only_columns(result.columns - {base_timespan_tag})
489 return result
491 def make_dataset_search_relation(
492 self,
493 dataset_type: DatasetType,
494 collections: Sequence[CollectionRecord],
495 columns: Set[str],
496 context: _C,
497 *,
498 join_to: Relation | None = None,
499 temporal_join_on: Set[ColumnTag] = frozenset(),
500 ) -> Relation:
501 """Construct a relation that represents an order query for datasets
502 that returns results from the first matching collection for each data
503 ID.
505 Parameters
506 ----------
507 dataset_type : `DatasetType`
508 Type for the datasets being search.
509 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
510 Records for collections to search. Should generally be the result
511 of a call to `resolve_dataset_collections`, and must not be empty.
512 columns : `~collections.abc.Set` [ `str` ]
513 Columns to include in the ``relation``. See
514 `make_dataset_query_relation` for options.
515 context : `QueryContext`
516 Context that manages per-query state.
517 join_to : `Relation`, optional
518 Another relation to join with the query for datasets in all
519 collections before filtering out out shadowed datasets.
520 temporal_join_on : `~collections.abc.Set` [ `ColumnTag` ], optional
521 Timespan columns in ``join_to`` that calibration dataset timespans
522 must overlap. Must already be present in ``join_to``. Ignored if
523 ``join_to`` is `None` or if there are no calibration collections.
525 Returns
526 -------
527 relation : `lsst.daf.relation.Relation`
528 Relation representing a find-first dataset search.
529 """
530 base = self.make_dataset_query_relation(
531 dataset_type,
532 collections,
533 columns | {"rank"},
534 context=context,
535 join_to=join_to,
536 temporal_join_on=temporal_join_on,
537 )
538 # Query-simplification shortcut: if there is only one collection, a
539 # find-first search is just a regular result subquery. Same if there
540 # are no collections.
541 if len(collections) <= 1:
542 return base
543 # We filter the dimension keys in the given relation through
544 # DimensionGroup.required.names to minimize the set we partition on
545 # and order it in a more index-friendly way. More precisely, any
546 # index we define on dimensions will be consistent with this order, but
547 # any particular index may not have the same dimension columns.
548 dimensions = self.universe.conform(
549 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)]
550 )
551 find_first = FindFirstDataset(
552 dimensions=DimensionKeyColumnTag.generate(dimensions.required),
553 rank=DatasetColumnTag(dataset_type.name, "rank"),
554 )
555 return find_first.apply(
556 base, preferred_engine=context.preferred_engine, require_preferred_engine=True
557 ).with_only_columns(base.columns - {find_first.rank})
559 def make_doomed_dataset_relation(
560 self,
561 dataset_type: DatasetType,
562 columns: Set[str],
563 messages: Iterable[str],
564 context: _C,
565 ) -> Relation:
566 """Construct a relation that represents a doomed query for datasets.
568 Parameters
569 ----------
570 dataset_type : `DatasetType`
571 Dataset type being queried.
572 columns : `~collections.abc.Set` [ `str` ]
573 Dataset columns to include (dimension key columns are always
574 included). See `make_dataset_query_relation` for allowed values.
575 messages : `~collections.abc.Iterable` [ `str` ]
576 Diagnostic messages that explain why the query is doomed to yield
577 no rows.
578 context : `QueryContext`
579 Context that manages per-query state.
581 Returns
582 -------
583 relation : `lsst.daf.relation.Relation`
584 Relation with the requested columns and no rows.
585 """
586 column_tags: set[ColumnTag] = set(
587 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names)
588 )
589 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns))
590 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages))
592 @abstractmethod
593 def make_dimension_relation(
594 self,
595 dimensions: DimensionGroup,
596 columns: Set[ColumnTag],
597 context: _C,
598 *,
599 initial_relation: Relation | None = None,
600 initial_join_max_columns: frozenset[ColumnTag] | None = None,
601 initial_dimension_relationships: Set[frozenset[str]] | None = None,
602 spatial_joins: Iterable[tuple[str, str]] = (),
603 governor_constraints: Mapping[str, Set[str]],
604 ) -> Relation:
605 """Construct a relation that provides columns and constraints from
606 dimension records.
608 Parameters
609 ----------
610 dimensions : `DimensionGroup`
611 Dimensions to include. The key columns for all dimensions (both
612 required and implied) will be included in the returned relation.
613 columns : `~collections.abc.Set` [ `ColumnTag` ]
614 Dimension record columns to include. This set may include key
615 column tags as well, though these may be ignored; the set of key
616 columns to include is determined by the ``dimensions`` argument
617 instead.
618 context : `QueryContext`
619 Context that manages per-query state.
620 initial_relation : `~lsst.daf.relation.Relation`, optional
621 Initial relation to join to the dimension relations. If this
622 relation provides record columns, key columns, and relationships
623 between key columns (see ``initial_dimension_relationships`` below)
624 that would otherwise have been added by joining in a dimension
625 element's relation, that relation may not be joined in at all.
626 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional
627 Maximum superset of common columns for joins to
628 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL
629 ``JOIN`` clauses). If provided, this is a subset of the dimension
630 key columns in ``initial_relation``, which are otherwise all
631 considered as potential common columns for joins. Ignored if
632 ``initial_relation`` is not provided.
633 initial_dimension_relationships : `~collections.abc.Set` \
634 [ `frozenset` [ `str` ] ], optional
635 A set of sets of dimension names representing relationships between
636 dimensions encoded in the rows of ``initial_relation``. If not
637 provided (and ``initial_relation`` is),
638 `extract_dimension_relationships` will be called on
639 ``initial_relation``.
640 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ]
641 Iterable of dimension element name pairs that should be spatially
642 joined.
643 governor_constraints : `~collections.abc.Mapping` [ `str` \
644 [ `~collections.abc.Set` [ `str` ] ] ], optional
645 Constraints on governor dimensions that are provided by other parts
646 of the query that either have been included in ``initial_relation``
647 or are guaranteed to be added in the future. This is a mapping from
648 governor dimension name to sets of values that dimension may take.
650 Returns
651 -------
652 relation : `lsst.daf.relation.Relation`
653 Relation containing the given dimension columns and constraints.
654 """
655 raise NotImplementedError()
657 @abstractmethod
658 def resolve_governor_constraints(
659 self, dimensions: DimensionGroup, constraints: Mapping[str, Set[str]]
660 ) -> Mapping[str, Set[str]]:
661 """Resolve governor dimension constraints provided by user input to
662 a query against the content in the `Registry`.
664 Parameters
665 ----------
666 dimensions : `DimensionGroup`
667 Dimensions that bound the governor dimensions to consider (via
668 ``dimensions.governors``, more specifically).
669 constraints : `~collections.abc.Mapping` [ `str`, \
670 `~collections.abc.Set` [ `str` ] ]
671 Constraints from user input to the query (e.g. from data IDs and
672 string expression predicates).
674 Returns
675 -------
676 resolved : `~collections.abc.Mapping` [ `str`, \
677 `~collections.abc.Set` [ `str` ] ]
678 A shallow copy of ``constraints`` with keys equal to
679 ``dimensions.governors.names`` and value sets constrained by the
680 Registry content if they were not already in ``constraints``.
682 Raises
683 ------
684 DataIdValueError
685 Raised if ``constraints`` includes governor dimension values that
686 are not present in the `Registry`.
687 """
688 raise NotImplementedError()
690 @abstractmethod
691 def get_dimension_record_cache(self, element_name: str) -> DimensionRecordSet | None:
692 """Return a local cache of all `DimensionRecord` objects for a
693 dimension element, fetching it if necessary.
695 Parameters
696 ----------
697 element_name : `str`
698 Name of the dimension element.
700 Returns
701 -------
702 cache : `~collections.abc.Mapping` [ `DataCoordinate`, \
703 `DimensionRecord` ] or `None`
704 Mapping from data ID to dimension record, or `None` if this
705 element's records are never cached.
706 """
707 raise NotImplementedError()
709 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]:
710 """Extract the dimension key relationships encoded in a relation tree.
712 Parameters
713 ----------
714 relation : `Relation`
715 Relation tree to process.
717 Returns
718 -------
719 relationships : `set` [ `frozenset` [ `str` ] ]
720 Set of sets of dimension names, where each inner set represents a
721 relationship between dimensions.
723 Notes
724 -----
725 Dimension relationships include both many-to-one implied dependencies
726 and many-to-many joins backed by "always-join" dimension elements, and
727 it's important to join in the dimension table that defines a
728 relationship in any query involving dimensions that are a superset of
729 that relationship. For example, let's consider a relation tree that
730 joins dataset existence-check relations for two dataset types, with
731 dimensions ``{instrument, exposure, detector}`` and ``{instrument,
732 physical_filter}``. The joined relation appears to have all dimension
733 keys in its expanded graph present except ``band``, and the system
734 could easily correct this by joining that dimension in directly. But
735 it's also missing the ``{instrument, exposure, physical_filter}``
736 relationship we'd get from the ``exposure`` dimension's own relation
737 (``exposure`` implies ``physical_filter``) and the similar
738 ``{instrument, physical_filter, band}`` relationship from the
739 ``physical_filter`` dimension relation; we need the relationship logic
740 to recognize that those dimensions need to be joined in as well in
741 order for the full relation to have rows that represent valid data IDs.
743 The implementation of this method relies on the assumption that
744 `LeafRelation` objects always have rows that are consistent with all
745 defined relationships (i.e. are valid data IDs). This is true for not
746 just dimension relations themselves, but anything created from queries
747 based on them, including datasets and query results. It is possible to
748 construct `LeafRelation` objects that don't satisfy this criteria (e.g.
749 when accepting in user-provided data IDs), and in this case
750 higher-level guards or warnings must be provided.``
751 """
752 return {
753 frozenset(
754 tag.dimension
755 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns)
756 )
757 for leaf_relation in self._extract_leaf_relations(relation).values()
758 }
760 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]:
761 """Recursively extract leaf relations from a relation tree.
763 Parameters
764 ----------
765 relation : `Relation`
766 Tree to process.
768 Returns
769 -------
770 leaves : `dict` [ `str`, `LeafRelation` ]
771 Leaf relations, keyed and deduplicated by name.
772 """
773 match relation:
774 case LeafRelation() as leaf:
775 return {leaf.name: leaf}
776 case UnaryOperationRelation(target=target):
777 return self._extract_leaf_relations(target)
778 case BinaryOperationRelation(lhs=lhs, rhs=rhs):
779 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs)
780 case MarkerRelation(target=target):
781 return self._extract_leaf_relations(target)
782 raise AssertionError("Match should be exhaustive and all branches should return.")