Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 39%
112 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import _timespan
31__all__ = ("QueryBackend",)
33from abc import abstractmethod
34from collections.abc import Iterable, Mapping, Sequence, Set
35from contextlib import AbstractContextManager
36from typing import TYPE_CHECKING, Any, Generic, TypeVar
38from lsst.daf.relation import (
39 BinaryOperationRelation,
40 ColumnExpression,
41 ColumnTag,
42 LeafRelation,
43 MarkerRelation,
44 Predicate,
45 Relation,
46 UnaryOperationRelation,
47)
49from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag
50from ..._dataset_type import DatasetType
51from ...dimensions import DimensionGroup, DimensionRecordSet, DimensionUniverse
52from .._collection_type import CollectionType
53from .._exceptions import DatasetTypeError, MissingDatasetTypeError
54from ..wildcards import CollectionWildcard
55from ._query_context import QueryContext
56from .find_first_dataset import FindFirstDataset
58if TYPE_CHECKING:
59 from ..interfaces import CollectionRecord
62_C = TypeVar("_C", bound=QueryContext)
65class QueryBackend(Generic[_C]):
66 """An interface for constructing and evaluating the
67 `~lsst.daf.relation.Relation` objects that comprise registry queries.
69 This ABC is expected to have a concrete subclass for each concrete registry
70 type, and most subclasses will be paired with a `QueryContext` subclass.
71 See `QueryContext` for the division of responsibilities between these two
72 interfaces.
73 """
75 @property
76 @abstractmethod
77 def universe(self) -> DimensionUniverse:
78 """Definition of all dimensions and dimension elements for this
79 registry (`DimensionUniverse`).
80 """
81 raise NotImplementedError()
83 @abstractmethod
84 def caching_context(self) -> AbstractContextManager[None]:
85 """Enable caching of collection records and summaries for the duration
86 of the returned context manager.
87 """
88 raise NotImplementedError()
90 def context(self) -> _C:
91 """Return a context manager that can be used to execute queries with
92 this backend.
94 Returns
95 -------
96 context : `QueryContext`
97 Context manager that manages state and connections needed to
98 execute queries.
99 """
100 raise NotImplementedError()
102 @abstractmethod
103 def get_collection_name(self, key: Any) -> str:
104 """Return the collection name associated with a collection primary key
105 value.
107 Parameters
108 ----------
109 key : `~typing.Any`
110 Collection primary key value.
112 Returns
113 -------
114 name : `str`
115 Collection name.
116 """
117 raise NotImplementedError()
119 @abstractmethod
120 def resolve_collection_wildcard(
121 self,
122 expression: Any,
123 *,
124 collection_types: Set[CollectionType] = CollectionType.all(),
125 done: set[str] | None = None,
126 flatten_chains: bool = True,
127 include_chains: bool | None = None,
128 ) -> list[CollectionRecord]:
129 """Return the collection records that match a wildcard expression.
131 Parameters
132 ----------
133 expression : `~typing.Any`
134 Names and/or patterns for collections; will be passed to
135 `CollectionWildcard.from_expression`.
136 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
137 If provided, only yield collections of these types.
138 done : `set` [ `str` ], optional
139 A set of collection names that should be skipped, updated to
140 include all processed collection names on return.
141 flatten_chains : `bool`, optional
142 If `True` (default) recursively yield the child collections of
143 `~CollectionType.CHAINED` collections.
144 include_chains : `bool`, optional
145 If `False`, return records for `~CollectionType.CHAINED`
146 collections themselves. The default is the opposite of
147 ``flattenChains``: either return records for CHAINED collections or
148 their children, but not both.
150 Returns
151 -------
152 records : `list` [ `CollectionRecord` ]
153 Matching collection records.
154 """
155 raise NotImplementedError()
157 @abstractmethod
158 def resolve_dataset_type_wildcard(
159 self,
160 expression: Any,
161 missing: list[str] | None = None,
162 explicit_only: bool = False,
163 ) -> list[DatasetType]:
164 """Return the dataset types that match a wildcard expression.
166 Parameters
167 ----------
168 expression : `~typing.Any`
169 Names and/or patterns for dataset types; will be passed to
170 `DatasetTypeWildcard.from_expression`.
171 missing : `list` of `str`, optional
172 String dataset type names that were explicitly given (i.e. not
173 regular expression patterns) but not found will be appended to this
174 list, if it is provided.
175 explicit_only : `bool`, optional
176 If `True`, require explicit `DatasetType` instances or `str` names,
177 with `re.Pattern` instances deprecated and ``...`` prohibited.
179 Returns
180 -------
181 dataset_types : `list` [ `DatasetType` ]
182 A list of resolved dataset types.
183 """
184 raise NotImplementedError()
186 def resolve_single_dataset_type_wildcard(
187 self,
188 expression: Any,
189 explicit_only: bool = False,
190 ) -> DatasetType:
191 """Return a single dataset type that matches a wildcard expression.
193 Parameters
194 ----------
195 expression : `~typing.Any`
196 Names and/or patterns for the dataset type; will be passed to
197 `DatasetTypeWildcard.from_expression`.
198 explicit_only : `bool`, optional
199 If `True`, require explicit `DatasetType` instances or `str` names,
200 with `re.Pattern` instances deprecated and ``...`` prohibited.
202 Returns
203 -------
204 single : `DatasetType`
205 The matched dataset type.
206 """
207 missing: list[str] = []
208 matching = self.resolve_dataset_type_wildcard(
209 expression, missing=missing, explicit_only=explicit_only
210 )
211 if not matching:
212 if missing:
213 raise MissingDatasetTypeError(
214 "\n".join(
215 f"Dataset type {t!r} is not registered, so no instances of it can exist."
216 for t in missing
217 )
218 )
219 else:
220 raise MissingDatasetTypeError(
221 f"No registered dataset types matched expression {expression!r}, "
222 "so no datasets will be found."
223 )
224 if len(matching) > 1:
225 raise DatasetTypeError(
226 f"Expression {expression!r} matched multiple parent dataset types: "
227 f"{[t.name for t in matching]}, but only one is allowed."
228 )
229 (single_parent,) = matching
230 if missing:
231 raise DatasetTypeError(
232 f"Expression {expression!r} appears to involve multiple dataset types, even though only "
233 f"one ({single_parent.name}) is registered, and only one is allowed here."
234 )
235 return single_parent
237 @abstractmethod
238 def filter_dataset_collections(
239 self,
240 dataset_types: Iterable[DatasetType],
241 collections: Sequence[CollectionRecord],
242 *,
243 governor_constraints: Mapping[str, Set[str]],
244 rejections: list[str] | None = None,
245 ) -> dict[DatasetType, list[CollectionRecord]]:
246 """Filter a sequence of collections to those for which a dataset query
247 might succeed.
249 Parameters
250 ----------
251 dataset_types : `~collections.abc.Iterable` [ `DatasetType` ]
252 Dataset types that are being queried. Must include only parent
253 or standalone dataset types, not components.
254 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
255 Sequence of collections that will be searched.
256 governor_constraints : `~collections.abc.Mapping` [ `str`, \
257 `~collections.abc.Set` [ `str` ] ], optional
258 Constraints imposed by other aspects of the query on governor
259 dimensions; collections inconsistent with these constraints will be
260 skipped.
261 rejections : `list` [ `str` ], optional
262 If not `None`, a `list` that diagnostic messages will be appended
263 to, for any collection that matches ``collections`` that is not
264 returned. At least one message is guaranteed whenever the result
265 is empty.
267 Returns
268 -------
269 dataset_collections : `dict` [ `DatasetType`, \
270 `list` [ `CollectionRecord` ] ]
271 The collections to search for each dataset. The dictionary's keys
272 are always exactly ``dataset_types`` (in the same order), and each
273 nested `list` of collections is ordered consistently with the
274 given ``collections``.
276 Notes
277 -----
278 This method accepts multiple dataset types and multiple collections at
279 once to enable implementations to batch up the fetching of summary
280 information needed to relate them.
281 """
282 raise NotImplementedError()
284 def resolve_dataset_collections(
285 self,
286 dataset_type: DatasetType,
287 collections: CollectionWildcard,
288 *,
289 governor_constraints: Mapping[str, Set[str]],
290 rejections: list[str] | None = None,
291 collection_types: Set[CollectionType] = CollectionType.all(),
292 allow_calibration_collections: bool = False,
293 ) -> list[CollectionRecord]:
294 """Resolve the sequence of collections to query for a dataset type.
296 Parameters
297 ----------
298 dataset_type : `DatasetType`
299 Dataset type to be queried in the returned collections.
300 collections : `CollectionWildcard`
301 Expression for the collections to be queried.
302 governor_constraints : `~collections.abc.Mapping` [ `str`, \
303 `~collections.abc.Set` ], optional
304 Constraints imposed by other aspects of the query on governor
305 dimensions; collections inconsistent with these constraints will be
306 skipped.
307 rejections : `list` [ `str` ], optional
308 If not `None`, a `list` that diagnostic messages will be appended
309 to, for any collection that matches ``collections`` that is not
310 returned. At least one message is guaranteed whenever the result
311 is empty.
312 collection_types : `~collections.abc.Set` [ `CollectionType` ], \
313 optional
314 Collection types to consider when resolving the collection
315 expression.
316 allow_calibration_collections : `bool`, optional
317 If `False`, skip (with a ``rejections`` message) any calibration
318 collections that match ``collections`` are not given explicitly by
319 name, and raise `NotImplementedError` for any calibration
320 collection that is given explicitly. This is a temporary option
321 that will be removed when the query system can handle temporal
322 joins involving calibration collections.
324 Returns
325 -------
326 records : `list` [ `CollectionRecord` ]
327 A new list of `CollectionRecord` instances, for collections that
328 both match ``collections`` and may have datasets of the given type.
330 Notes
331 -----
332 This is a higher-level driver for `resolve_collection_wildcard` and
333 `filter_dataset_collections` that is mostly concerned with handling
334 queries against `~Collection.Type.CALIBRATION` collections that aren't
335 fully supported yet. Once that support improves, this method may be
336 removed.
337 """
338 if collections == CollectionWildcard() and collection_types == CollectionType.all():
339 collection_types = {CollectionType.RUN}
340 explicit_collections = frozenset(collections.strings)
341 matching_collection_records = self.resolve_collection_wildcard(
342 collections, collection_types=collection_types
343 )
344 ((_, filtered_collection_records),) = self.filter_dataset_collections(
345 [dataset_type],
346 matching_collection_records,
347 governor_constraints=governor_constraints,
348 rejections=rejections,
349 ).items()
350 if not allow_calibration_collections:
351 supported_collection_records: list[CollectionRecord] = []
352 for record in filtered_collection_records:
353 if record.type is CollectionType.CALIBRATION:
354 # If collection name was provided explicitly then raise,
355 # since this is a kind of query we don't support yet;
356 # otherwise collection is a part of a chained one or regex
357 # match, and we skip it to not break queries of other
358 # included collections.
359 if record.name in explicit_collections:
360 raise NotImplementedError(
361 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type "
362 f"collection {record.name!r} is not yet supported."
363 )
364 else:
365 if rejections is not None:
366 rejections.append(
367 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION "
368 f"collection {record.name!r} because calibration queries aren't fully "
369 "implemented; this is not an error only because the query structure "
370 "implies that searching this collection may be incidental."
371 )
372 supported_collection_records.append(record)
373 else:
374 supported_collection_records.append(record)
375 else:
376 supported_collection_records = filtered_collection_records
377 if not supported_collection_records and rejections is not None and not rejections:
378 rejections.append(f"No collections to search matching expression {collections!r}.")
379 return supported_collection_records
381 @abstractmethod
382 def _make_dataset_query_relation_impl(
383 self,
384 dataset_type: DatasetType,
385 collections: Sequence[CollectionRecord],
386 columns: Set[str],
387 context: _C,
388 ) -> Relation:
389 """Construct a relation that represents an unordered query for datasets
390 that returns matching results from all given collections.
392 Parameters
393 ----------
394 dataset_type : `DatasetType`
395 Type for the datasets being queried.
396 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
397 Records for collections to query. Should generally be the result
398 of a call to `resolve_dataset_collections`, and must not be empty.
399 context : `QueryContext`
400 Context that manages per-query state.
401 columns : `~collections.abc.Set` [ `str` ]
402 Columns to include in the relation. See `Query.find_datasets` for
403 details.
405 Returns
406 -------
407 relation : `lsst.daf.relation.Relation`
408 Relation representing a dataset query.
410 Notes
411 -----
412 This method must be implemented by derived classes but is not
413 responsible for joining the resulting relation to an existing relation.
414 """
415 raise NotImplementedError()
417 def make_dataset_query_relation(
418 self,
419 dataset_type: DatasetType,
420 collections: Sequence[CollectionRecord],
421 columns: Set[str],
422 context: _C,
423 *,
424 join_to: Relation | None = None,
425 temporal_join_on: Set[ColumnTag] = frozenset(),
426 ) -> Relation:
427 """Construct a relation that represents an unordered query for datasets
428 that returns matching results from all given collections.
430 Parameters
431 ----------
432 dataset_type : `DatasetType`
433 Type for the datasets being queried.
434 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
435 Records for collections to query. Should generally be the result
436 of a call to `resolve_dataset_collections`, and must not be empty.
437 columns : `~collections.abc.Set` [ `str` ]
438 Columns to include in the relation. See `Query.find_datasets` for
439 details.
440 context : `QueryContext`
441 Context that manages per-query state.
442 join_to : `Relation`, optional
443 Another relation to join with the query for datasets in all
444 collections.
445 temporal_join_on : `~collections.abc.Set` [ `ColumnTag` ], optional
446 Timespan columns in ``join_to`` that calibration dataset timespans
447 must overlap. Must already be present in ``join_to``. Ignored if
448 ``join_to`` is `None` or if there are no calibration collections.
450 Returns
451 -------
452 relation : `lsst.daf.relation.Relation`
453 Relation representing a dataset query.
454 """
455 # If we need to do a temporal join to a calibration collection, we need
456 # to include the timespan column in the base query and prepare the join
457 # predicate.
458 join_predicates: list[Predicate] = []
459 base_timespan_tag: ColumnTag | None = None
460 full_columns: set[str] = set(columns)
461 if (
462 temporal_join_on
463 and join_to is not None
464 and any(r.type is CollectionType.CALIBRATION for r in collections)
465 ):
466 base_timespan_tag = DatasetColumnTag(dataset_type.name, "timespan")
467 rhs = ColumnExpression.reference(base_timespan_tag, dtype=_timespan.Timespan)
468 full_columns.add("timespan")
469 for timespan_tag in temporal_join_on:
470 lhs = ColumnExpression.reference(timespan_tag, dtype=_timespan.Timespan)
471 join_predicates.append(lhs.predicate_method("overlaps", rhs))
472 # Delegate to the concrete QueryBackend subclass to do most of the
473 # work.
474 result = self._make_dataset_query_relation_impl(
475 dataset_type,
476 collections,
477 full_columns,
478 context=context,
479 )
480 if join_to is not None:
481 result = join_to.join(
482 result, predicate=Predicate.logical_and(*join_predicates) if join_predicates else None
483 )
484 if join_predicates and "timespan" not in columns:
485 # Drop the timespan column we added for the join only if the
486 # timespan wasn't requested in its own right.
487 result = result.with_only_columns(result.columns - {base_timespan_tag})
488 return result
490 def make_dataset_search_relation(
491 self,
492 dataset_type: DatasetType,
493 collections: Sequence[CollectionRecord],
494 columns: Set[str],
495 context: _C,
496 *,
497 join_to: Relation | None = None,
498 temporal_join_on: Set[ColumnTag] = frozenset(),
499 ) -> Relation:
500 """Construct a relation that represents an order query for datasets
501 that returns results from the first matching collection for each data
502 ID.
504 Parameters
505 ----------
506 dataset_type : `DatasetType`
507 Type for the datasets being search.
508 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
509 Records for collections to search. Should generally be the result
510 of a call to `resolve_dataset_collections`, and must not be empty.
511 columns : `~collections.abc.Set` [ `str` ]
512 Columns to include in the ``relation``. See
513 `make_dataset_query_relation` for options.
514 context : `QueryContext`
515 Context that manages per-query state.
516 join_to : `Relation`, optional
517 Another relation to join with the query for datasets in all
518 collections before filtering out out shadowed datasets.
519 temporal_join_on : `~collections.abc.Set` [ `ColumnTag` ], optional
520 Timespan columns in ``join_to`` that calibration dataset timespans
521 must overlap. Must already be present in ``join_to``. Ignored if
522 ``join_to`` is `None` or if there are no calibration collections.
524 Returns
525 -------
526 relation : `lsst.daf.relation.Relation`
527 Relation representing a find-first dataset search.
528 """
529 base = self.make_dataset_query_relation(
530 dataset_type,
531 collections,
532 columns | {"rank"},
533 context=context,
534 join_to=join_to,
535 temporal_join_on=temporal_join_on,
536 )
537 # Query-simplification shortcut: if there is only one collection, a
538 # find-first search is just a regular result subquery. Same if there
539 # are no collections.
540 if len(collections) <= 1:
541 return base
542 # We filter the dimension keys in the given relation through
543 # DimensionGroup.required.names to minimize the set we partition on
544 # and order it in a more index-friendly way. More precisely, any
545 # index we define on dimensions will be consistent with this order, but
546 # any particular index may not have the same dimension columns.
547 dimensions = self.universe.conform(
548 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)]
549 )
550 find_first = FindFirstDataset(
551 dimensions=DimensionKeyColumnTag.generate(dimensions.required),
552 rank=DatasetColumnTag(dataset_type.name, "rank"),
553 )
554 return find_first.apply(
555 base, preferred_engine=context.preferred_engine, require_preferred_engine=True
556 ).with_only_columns(base.columns - {find_first.rank})
558 def make_doomed_dataset_relation(
559 self,
560 dataset_type: DatasetType,
561 columns: Set[str],
562 messages: Iterable[str],
563 context: _C,
564 ) -> Relation:
565 """Construct a relation that represents a doomed query for datasets.
567 Parameters
568 ----------
569 dataset_type : `DatasetType`
570 Dataset type being queried.
571 columns : `~collections.abc.Set` [ `str` ]
572 Dataset columns to include (dimension key columns are always
573 included). See `make_dataset_query_relation` for allowed values.
574 messages : `~collections.abc.Iterable` [ `str` ]
575 Diagnostic messages that explain why the query is doomed to yield
576 no rows.
577 context : `QueryContext`
578 Context that manages per-query state.
580 Returns
581 -------
582 relation : `lsst.daf.relation.Relation`
583 Relation with the requested columns and no rows.
584 """
585 column_tags: set[ColumnTag] = set(
586 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names)
587 )
588 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns))
589 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages))
591 @abstractmethod
592 def make_dimension_relation(
593 self,
594 dimensions: DimensionGroup,
595 columns: Set[ColumnTag],
596 context: _C,
597 *,
598 initial_relation: Relation | None = None,
599 initial_join_max_columns: frozenset[ColumnTag] | None = None,
600 initial_dimension_relationships: Set[frozenset[str]] | None = None,
601 spatial_joins: Iterable[tuple[str, str]] = (),
602 governor_constraints: Mapping[str, Set[str]],
603 ) -> Relation:
604 """Construct a relation that provides columns and constraints from
605 dimension records.
607 Parameters
608 ----------
609 dimensions : `DimensionGroup`
610 Dimensions to include. The key columns for all dimensions (both
611 required and implied) will be included in the returned relation.
612 columns : `~collections.abc.Set` [ `ColumnTag` ]
613 Dimension record columns to include. This set may include key
614 column tags as well, though these may be ignored; the set of key
615 columns to include is determined by the ``dimensions`` argument
616 instead.
617 context : `QueryContext`
618 Context that manages per-query state.
619 initial_relation : `~lsst.daf.relation.Relation`, optional
620 Initial relation to join to the dimension relations. If this
621 relation provides record columns, key columns, and relationships
622 between key columns (see ``initial_dimension_relationships`` below)
623 that would otherwise have been added by joining in a dimension
624 element's relation, that relation may not be joined in at all.
625 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional
626 Maximum superset of common columns for joins to
627 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL
628 ``JOIN`` clauses). If provided, this is a subset of the dimension
629 key columns in ``initial_relation``, which are otherwise all
630 considered as potential common columns for joins. Ignored if
631 ``initial_relation`` is not provided.
632 initial_dimension_relationships : `~collections.abc.Set` \
633 [ `frozenset` [ `str` ] ], optional
634 A set of sets of dimension names representing relationships between
635 dimensions encoded in the rows of ``initial_relation``. If not
636 provided (and ``initial_relation`` is),
637 `extract_dimension_relationships` will be called on
638 ``initial_relation``.
639 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ]
640 Iterable of dimension element name pairs that should be spatially
641 joined.
642 governor_constraints : `~collections.abc.Mapping` [ `str` \
643 [ `~collections.abc.Set` [ `str` ] ] ], optional
644 Constraints on governor dimensions that are provided by other parts
645 of the query that either have been included in ``initial_relation``
646 or are guaranteed to be added in the future. This is a mapping from
647 governor dimension name to sets of values that dimension may take.
649 Returns
650 -------
651 relation : `lsst.daf.relation.Relation`
652 Relation containing the given dimension columns and constraints.
653 """
654 raise NotImplementedError()
656 @abstractmethod
657 def resolve_governor_constraints(
658 self, dimensions: DimensionGroup, constraints: Mapping[str, Set[str]]
659 ) -> Mapping[str, Set[str]]:
660 """Resolve governor dimension constraints provided by user input to
661 a query against the content in the `Registry`.
663 Parameters
664 ----------
665 dimensions : `DimensionGroup`
666 Dimensions that bound the governor dimensions to consider (via
667 ``dimensions.governors``, more specifically).
668 constraints : `~collections.abc.Mapping` [ `str`, \
669 `~collections.abc.Set` [ `str` ] ]
670 Constraints from user input to the query (e.g. from data IDs and
671 string expression predicates).
673 Returns
674 -------
675 resolved : `~collections.abc.Mapping` [ `str`, \
676 `~collections.abc.Set` [ `str` ] ]
677 A shallow copy of ``constraints`` with keys equal to
678 ``dimensions.governors.names`` and value sets constrained by the
679 Registry content if they were not already in ``constraints``.
681 Raises
682 ------
683 DataIdValueError
684 Raised if ``constraints`` includes governor dimension values that
685 are not present in the `Registry`.
686 """
687 raise NotImplementedError()
689 @abstractmethod
690 def get_dimension_record_cache(self, element_name: str) -> DimensionRecordSet | None:
691 """Return a local cache of all `DimensionRecord` objects for a
692 dimension element, fetching it if necessary.
694 Parameters
695 ----------
696 element_name : `str`
697 Name of the dimension element.
699 Returns
700 -------
701 cache : `~collections.abc.Mapping` [ `DataCoordinate`, \
702 `DimensionRecord` ] or `None`
703 Mapping from data ID to dimension record, or `None` if this
704 element's records are never cached.
705 """
706 raise NotImplementedError()
708 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]:
709 """Extract the dimension key relationships encoded in a relation tree.
711 Parameters
712 ----------
713 relation : `Relation`
714 Relation tree to process.
716 Returns
717 -------
718 relationships : `set` [ `frozenset` [ `str` ] ]
719 Set of sets of dimension names, where each inner set represents a
720 relationship between dimensions.
722 Notes
723 -----
724 Dimension relationships include both many-to-one implied dependencies
725 and many-to-many joins backed by "always-join" dimension elements, and
726 it's important to join in the dimension table that defines a
727 relationship in any query involving dimensions that are a superset of
728 that relationship. For example, let's consider a relation tree that
729 joins dataset existence-check relations for two dataset types, with
730 dimensions ``{instrument, exposure, detector}`` and ``{instrument,
731 physical_filter}``. The joined relation appears to have all dimension
732 keys in its expanded graph present except ``band``, and the system
733 could easily correct this by joining that dimension in directly. But
734 it's also missing the ``{instrument, exposure, physical_filter}``
735 relationship we'd get from the ``exposure`` dimension's own relation
736 (``exposure`` implies ``physical_filter``) and the similar
737 ``{instrument, physical_filter, band}`` relationship from the
738 ``physical_filter`` dimension relation; we need the relationship logic
739 to recognize that those dimensions need to be joined in as well in
740 order for the full relation to have rows that represent valid data IDs.
742 The implementation of this method relies on the assumption that
743 `LeafRelation` objects always have rows that are consistent with all
744 defined relationships (i.e. are valid data IDs). This is true for not
745 just dimension relations themselves, but anything created from queries
746 based on them, including datasets and query results. It is possible to
747 construct `LeafRelation` objects that don't satisfy this criteria (e.g.
748 when accepting in user-provided data IDs), and in this case
749 higher-level guards or warnings must be provided.``
750 """
751 return {
752 frozenset(
753 tag.dimension
754 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns)
755 )
756 for leaf_relation in self._extract_leaf_relations(relation).values()
757 }
759 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]:
760 """Recursively extract leaf relations from a relation tree.
762 Parameters
763 ----------
764 relation : `Relation`
765 Tree to process.
767 Returns
768 -------
769 leaves : `dict` [ `str`, `LeafRelation` ]
770 Leaf relations, keyed and deduplicated by name.
771 """
772 match relation:
773 case LeafRelation() as leaf:
774 return {leaf.name: leaf}
775 case UnaryOperationRelation(target=target):
776 return self._extract_leaf_relations(target)
777 case BinaryOperationRelation(lhs=lhs, rhs=rhs):
778 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs)
779 case MarkerRelation(target=target):
780 return self._extract_leaf_relations(target)
781 raise AssertionError("Match should be exhaustive and all branches should return.")