Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 38%
109 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-05 11:07 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-05 11:07 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import _timespan
31__all__ = ("QueryBackend",)
33from abc import abstractmethod
34from collections.abc import Iterable, Mapping, Sequence, Set
35from typing import TYPE_CHECKING, Any, Generic, TypeVar
37from lsst.daf.relation import (
38 BinaryOperationRelation,
39 ColumnExpression,
40 ColumnTag,
41 LeafRelation,
42 MarkerRelation,
43 Predicate,
44 Relation,
45 UnaryOperationRelation,
46)
48from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag
49from ..._dataset_type import DatasetType
50from ...dimensions import DataCoordinate, DimensionGroup, DimensionRecord, DimensionUniverse
51from .._collection_type import CollectionType
52from .._exceptions import DatasetTypeError, MissingDatasetTypeError
53from ..wildcards import CollectionWildcard
54from ._query_context import QueryContext
55from .find_first_dataset import FindFirstDataset
57if TYPE_CHECKING:
58 from ..interfaces import CollectionRecord
61_C = TypeVar("_C", bound=QueryContext)
64class QueryBackend(Generic[_C]):
65 """An interface for constructing and evaluating the
66 `~lsst.daf.relation.Relation` objects that comprise registry queries.
68 This ABC is expected to have a concrete subclass for each concrete registry
69 type, and most subclasses will be paired with a `QueryContext` subclass.
70 See `QueryContext` for the division of responsibilities between these two
71 interfaces.
72 """
74 @property
75 @abstractmethod
76 def universe(self) -> DimensionUniverse:
77 """Definition of all dimensions and dimension elements for this
78 registry (`DimensionUniverse`).
79 """
80 raise NotImplementedError()
82 def context(self) -> _C:
83 """Return a context manager that can be used to execute queries with
84 this backend.
86 Returns
87 -------
88 context : `QueryContext`
89 Context manager that manages state and connections needed to
90 execute queries.
91 """
92 raise NotImplementedError()
94 @abstractmethod
95 def get_collection_name(self, key: Any) -> str:
96 """Return the collection name associated with a collection primary key
97 value.
99 Parameters
100 ----------
101 key
102 Collection primary key value.
104 Returns
105 -------
106 name : `str`
107 Collection name.
108 """
109 raise NotImplementedError()
111 @abstractmethod
112 def resolve_collection_wildcard(
113 self,
114 expression: Any,
115 *,
116 collection_types: Set[CollectionType] = CollectionType.all(),
117 done: set[str] | None = None,
118 flatten_chains: bool = True,
119 include_chains: bool | None = None,
120 ) -> list[CollectionRecord]:
121 """Return the collection records that match a wildcard expression.
123 Parameters
124 ----------
125 expression
126 Names and/or patterns for collections; will be passed to
127 `CollectionWildcard.from_expression`.
128 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
129 If provided, only yield collections of these types.
130 done : `set` [ `str` ], optional
131 A set of collection names that should be skipped, updated to
132 include all processed collection names on return.
133 flatten_chains : `bool`, optional
134 If `True` (default) recursively yield the child collections of
135 `~CollectionType.CHAINED` collections.
136 include_chains : `bool`, optional
137 If `False`, return records for `~CollectionType.CHAINED`
138 collections themselves. The default is the opposite of
139 ``flattenChains``: either return records for CHAINED collections or
140 their children, but not both.
142 Returns
143 -------
144 records : `list` [ `CollectionRecord` ]
145 Matching collection records.
146 """
147 raise NotImplementedError()
149 @abstractmethod
150 def resolve_dataset_type_wildcard(
151 self,
152 expression: Any,
153 components: bool | None = None,
154 missing: list[str] | None = None,
155 explicit_only: bool = False,
156 components_deprecated: bool = True,
157 ) -> dict[DatasetType, list[str | None]]:
158 """Return the dataset types that match a wildcard expression.
160 Parameters
161 ----------
162 expression
163 Names and/or patterns for dataset types; will be passed to
164 `DatasetTypeWildcard.from_expression`.
165 components : `bool`, optional
166 If `True`, apply all expression patterns to component dataset type
167 names as well. If `False`, never apply patterns to components. If
168 `None` (default), apply patterns to components only if their parent
169 datasets were not matched by the expression. Fully-specified
170 component datasets (`str` or `DatasetType` instances) are always
171 included.
172 missing : `list` of `str`, optional
173 String dataset type names that were explicitly given (i.e. not
174 regular expression patterns) but not found will be appended to this
175 list, if it is provided.
176 explicit_only : `bool`, optional
177 If `True`, require explicit `DatasetType` instances or `str` names,
178 with `re.Pattern` instances deprecated and ``...`` prohibited.
179 components_deprecated : `bool`, optional
180 If `True`, this is a context in which component dataset support is
181 deprecated. This will result in a deprecation warning when
182 ``components=True`` or ``components=None`` and a component dataset
183 is matched. In the future this will become an error.
185 Returns
186 -------
187 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ]
188 A mapping with resolved dataset types as keys and lists of
189 matched component names as values, where `None` indicates the
190 parent composite dataset type was matched.
191 """
192 raise NotImplementedError()
194 def resolve_single_dataset_type_wildcard(
195 self,
196 expression: Any,
197 components: bool | None = None,
198 explicit_only: bool = False,
199 components_deprecated: bool = True,
200 ) -> tuple[DatasetType, list[str | None]]:
201 """Return a single dataset type that matches a wildcard expression.
203 Parameters
204 ----------
205 expression
206 Names and/or patterns for the dataset type; will be passed to
207 `DatasetTypeWildcard.from_expression`.
208 components : `bool`, optional
209 If `True`, apply all expression patterns to component dataset type
210 names as well. If `False`, never apply patterns to components. If
211 `None` (default), apply patterns to components only if their parent
212 datasets were not matched by the expression. Fully-specified
213 component datasets (`str` or `DatasetType` instances) are always
214 included.
215 explicit_only : `bool`, optional
216 If `True`, require explicit `DatasetType` instances or `str` names,
217 with `re.Pattern` instances deprecated and ``...`` prohibited.
218 components_deprecated : `bool`, optional
219 If `True`, this is a context in which component dataset support is
220 deprecated. This will result in a deprecation warning when
221 ``components=True`` or ``components=None`` and a component dataset
222 is matched. In the future this will become an error.
224 Returns
225 -------
226 single_parent : `DatasetType`
227 The matched parent dataset type.
228 single_components : `list` [ `str` | `None` ]
229 The matched components that correspond to this parent, or `None` if
230 the parent dataset type itself was matched.
232 Notes
233 -----
234 This method really finds a single parent dataset type and any number of
235 components, because it's only the parent dataset type that's known to
236 registry at all; many callers are expected to discard the
237 ``single_components`` return value.
238 """
239 missing: list[str] = []
240 matching = self.resolve_dataset_type_wildcard(
241 expression,
242 components=components,
243 missing=missing,
244 explicit_only=explicit_only,
245 components_deprecated=components_deprecated,
246 )
247 if not matching:
248 if missing:
249 raise MissingDatasetTypeError(
250 "\n".join(
251 f"Dataset type {t!r} is not registered, so no instances of it can exist."
252 for t in missing
253 )
254 )
255 else:
256 raise MissingDatasetTypeError(
257 f"No registered dataset types matched expression {expression!r}, "
258 "so no datasets will be found."
259 )
260 if len(matching) > 1:
261 raise DatasetTypeError(
262 f"Expression {expression!r} matched multiple parent dataset types: "
263 f"{[t.name for t in matching]}, but only one is allowed."
264 )
265 ((single_parent, single_components),) = matching.items()
266 if missing:
267 raise DatasetTypeError(
268 f"Expression {expression!r} appears to involve multiple dataset types, even though only "
269 f"one ({single_parent.name}) is registered, and only one is allowed here."
270 )
271 return single_parent, single_components
273 @abstractmethod
274 def filter_dataset_collections(
275 self,
276 dataset_types: Iterable[DatasetType],
277 collections: Sequence[CollectionRecord],
278 *,
279 governor_constraints: Mapping[str, Set[str]],
280 rejections: list[str] | None = None,
281 ) -> dict[DatasetType, list[CollectionRecord]]:
282 """Filter a sequence of collections to those for which a dataset query
283 might succeed.
285 Parameters
286 ----------
287 dataset_types : `~collections.abc.Iterable` [ `DatasetType` ]
288 Dataset types that are being queried. Must include only parent
289 or standalone dataset types, not components.
290 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
291 Sequence of collections that will be searched.
292 governor_constraints : `~collections.abc.Mapping` [ `str`, \
293 `~collections.abc.Set` [ `str` ] ], optional
294 Constraints imposed by other aspects of the query on governor
295 dimensions; collections inconsistent with these constraints will be
296 skipped.
297 rejections : `list` [ `str` ], optional
298 If not `None`, a `list` that diagnostic messages will be appended
299 to, for any collection that matches ``collections`` that is not
300 returned. At least one message is guaranteed whenever the result
301 is empty.
303 Returns
304 -------
305 dataset_collections : `dict` [ `DatasetType`, \
306 `list` [ `CollectionRecord` ] ]
307 The collections to search for each dataset. The dictionary's keys
308 are always exactly ``dataset_types`` (in the same order), and each
309 nested `list` of collections is ordered consistently with the
310 given ``collections``.
312 Notes
313 -----
314 This method accepts multiple dataset types and multiple collections at
315 once to enable implementations to batch up the fetching of summary
316 information needed to relate them.
317 """
318 raise NotImplementedError()
320 def resolve_dataset_collections(
321 self,
322 dataset_type: DatasetType,
323 collections: CollectionWildcard,
324 *,
325 governor_constraints: Mapping[str, Set[str]],
326 rejections: list[str] | None = None,
327 collection_types: Set[CollectionType] = CollectionType.all(),
328 allow_calibration_collections: bool = False,
329 ) -> list[CollectionRecord]:
330 """Resolve the sequence of collections to query for a dataset type.
332 Parameters
333 ----------
334 dataset_type : `DatasetType`
335 Dataset type to be queried in the returned collections.
336 collections : `CollectionWildcard`
337 Expression for the collections to be queried.
338 governor_constraints : `~collections.abc.Mapping` [ `str`, \
339 `~collections.abc.Set` ], optional
340 Constraints imposed by other aspects of the query on governor
341 dimensions; collections inconsistent with these constraints will be
342 skipped.
343 rejections : `list` [ `str` ], optional
344 If not `None`, a `list` that diagnostic messages will be appended
345 to, for any collection that matches ``collections`` that is not
346 returned. At least one message is guaranteed whenever the result
347 is empty.
348 collection_types : `~collections.abc.Set` [ `CollectionType` ], \
349 optional
350 Collection types to consider when resolving the collection
351 expression.
352 allow_calibration_collections : `bool`, optional
353 If `False`, skip (with a ``rejections`` message) any calibration
354 collections that match ``collections`` are not given explicitly by
355 name, and raise `NotImplementedError` for any calibration
356 collection that is given explicitly. This is a temporary option
357 that will be removed when the query system can handle temporal
358 joins involving calibration collections.
360 Returns
361 -------
362 records : `list` [ `CollectionRecord` ]
363 A new list of `CollectionRecord` instances, for collections that
364 both match ``collections`` and may have datasets of the given type.
366 Notes
367 -----
368 This is a higher-level driver for `resolve_collection_wildcard` and
369 `filter_dataset_collections` that is mostly concerned with handling
370 queries against `~Collection.Type.CALIBRATION` collections that aren't
371 fully supported yet. Once that support improves, this method may be
372 removed.
373 """
374 if collections == CollectionWildcard() and collection_types == CollectionType.all():
375 collection_types = {CollectionType.RUN}
376 explicit_collections = frozenset(collections.strings)
377 matching_collection_records = self.resolve_collection_wildcard(
378 collections, collection_types=collection_types
379 )
380 ((_, filtered_collection_records),) = self.filter_dataset_collections(
381 [dataset_type],
382 matching_collection_records,
383 governor_constraints=governor_constraints,
384 rejections=rejections,
385 ).items()
386 if not allow_calibration_collections:
387 supported_collection_records: list[CollectionRecord] = []
388 for record in filtered_collection_records:
389 if record.type is CollectionType.CALIBRATION:
390 # If collection name was provided explicitly then raise,
391 # since this is a kind of query we don't support yet;
392 # otherwise collection is a part of a chained one or regex
393 # match, and we skip it to not break queries of other
394 # included collections.
395 if record.name in explicit_collections:
396 raise NotImplementedError(
397 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type "
398 f"collection {record.name!r} is not yet supported."
399 )
400 else:
401 if rejections is not None:
402 rejections.append(
403 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION "
404 f"collection {record.name!r} because calibration queries aren't fully "
405 "implemented; this is not an error only because the query structure "
406 "implies that searching this collection may be incidental."
407 )
408 supported_collection_records.append(record)
409 else:
410 supported_collection_records.append(record)
411 else:
412 supported_collection_records = filtered_collection_records
413 if not supported_collection_records and rejections is not None and not rejections:
414 rejections.append(f"No collections to search matching expression {collections!r}.")
415 return supported_collection_records
417 @abstractmethod
418 def _make_dataset_query_relation_impl(
419 self,
420 dataset_type: DatasetType,
421 collections: Sequence[CollectionRecord],
422 columns: Set[str],
423 context: _C,
424 ) -> Relation:
425 """Construct a relation that represents an unordered query for datasets
426 that returns matching results from all given collections.
428 Parameters
429 ----------
430 dataset_type : `DatasetType`
431 Type for the datasets being queried.
432 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
433 Records for collections to query. Should generally be the result
434 of a call to `resolve_dataset_collections`, and must not be empty.
435 context : `QueryContext`
436 Context that manages per-query state.
437 columns : `~collections.abc.Set` [ `str` ]
438 Columns to include in the relation. See `Query.find_datasets` for
439 details.
441 Returns
442 -------
443 relation : `lsst.daf.relation.Relation`
444 Relation representing a dataset query.
446 Notes
447 -----
448 This method must be implemented by derived classes but is not
449 responsible for joining the resulting relation to an existing relation.
450 """
451 raise NotImplementedError()
453 def make_dataset_query_relation(
454 self,
455 dataset_type: DatasetType,
456 collections: Sequence[CollectionRecord],
457 columns: Set[str],
458 context: _C,
459 *,
460 join_to: Relation | None = None,
461 temporal_join_on: Set[ColumnTag] = frozenset(),
462 ) -> Relation:
463 """Construct a relation that represents an unordered query for datasets
464 that returns matching results from all given collections.
466 Parameters
467 ----------
468 dataset_type : `DatasetType`
469 Type for the datasets being queried.
470 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
471 Records for collections to query. Should generally be the result
472 of a call to `resolve_dataset_collections`, and must not be empty.
473 context : `QueryContext`
474 Context that manages per-query state.
475 columns : `~collections.abc.Set` [ `str` ]
476 Columns to include in the relation. See `Query.find_datasets` for
477 details.
478 join_to : `Relation`, optional
479 Another relation to join with the query for datasets in all
480 collections.
481 temporal_join_on: `~collections.abc.Set` [ `ColumnTag` ], optional
482 Timespan columns in ``join_to`` that calibration dataset timespans
483 must overlap. Must already be present in ``join_to``. Ignored if
484 ``join_to`` is `None` or if there are no calibration collections.
486 Returns
487 -------
488 relation : `lsst.daf.relation.Relation`
489 Relation representing a dataset query.
490 """
491 # If we need to do a temporal join to a calibration collection, we need
492 # to include the timespan column in the base query and prepare the join
493 # predicate.
494 join_predicates: list[Predicate] = []
495 base_timespan_tag: ColumnTag | None = None
496 full_columns: set[str] = set(columns)
497 if (
498 temporal_join_on
499 and join_to is not None
500 and any(r.type is CollectionType.CALIBRATION for r in collections)
501 ):
502 base_timespan_tag = DatasetColumnTag(dataset_type.name, "timespan")
503 rhs = ColumnExpression.reference(base_timespan_tag, dtype=_timespan.Timespan)
504 full_columns.add("timespan")
505 for timespan_tag in temporal_join_on:
506 lhs = ColumnExpression.reference(timespan_tag, dtype=_timespan.Timespan)
507 join_predicates.append(lhs.predicate_method("overlaps", rhs))
508 # Delegate to the concrete QueryBackend subclass to do most of the
509 # work.
510 result = self._make_dataset_query_relation_impl(
511 dataset_type,
512 collections,
513 full_columns,
514 context=context,
515 )
516 if join_to is not None:
517 result = join_to.join(
518 result, predicate=Predicate.logical_and(*join_predicates) if join_predicates else None
519 )
520 if join_predicates and "timespan" not in columns:
521 # Drop the timespan column we added for the join only if the
522 # timespan wasn't requested in its own right.
523 result = result.with_only_columns(result.columns - {base_timespan_tag})
524 return result
526 def make_dataset_search_relation(
527 self,
528 dataset_type: DatasetType,
529 collections: Sequence[CollectionRecord],
530 columns: Set[str],
531 context: _C,
532 *,
533 join_to: Relation | None = None,
534 temporal_join_on: Set[ColumnTag] = frozenset(),
535 ) -> Relation:
536 """Construct a relation that represents an order query for datasets
537 that returns results from the first matching collection for each data
538 ID.
540 Parameters
541 ----------
542 dataset_type : `DatasetType`
543 Type for the datasets being search.
544 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
545 Records for collections to search. Should generally be the result
546 of a call to `resolve_dataset_collections`, and must not be empty.
547 columns : `~collections.abc.Set` [ `str` ]
548 Columns to include in the ``relation``. See
549 `make_dataset_query_relation` for options.
550 context : `QueryContext`
551 Context that manages per-query state.
552 join_to : `Relation`, optional
553 Another relation to join with the query for datasets in all
554 collections before filtering out out shadowed datasets.
555 temporal_join_on: `~collections.abc.Set` [ `ColumnTag` ], optional
556 Timespan columns in ``join_to`` that calibration dataset timespans
557 must overlap. Must already be present in ``join_to``. Ignored if
558 ``join_to`` is `None` or if there are no calibration collections.
560 Returns
561 -------
562 relation : `lsst.daf.relation.Relation`
563 Relation representing a find-first dataset search.
564 """
565 base = self.make_dataset_query_relation(
566 dataset_type,
567 collections,
568 columns | {"rank"},
569 context=context,
570 join_to=join_to,
571 temporal_join_on=temporal_join_on,
572 )
573 # Query-simplification shortcut: if there is only one collection, a
574 # find-first search is just a regular result subquery. Same if there
575 # are no collections.
576 if len(collections) <= 1:
577 return base
578 # We filter the dimension keys in the given relation through
579 # DimensionGroup.required.names to minimize the set we partition on
580 # and order it in a more index-friendly way. More precisely, any
581 # index we define on dimensions will be consistent with this order, but
582 # any particular index may not have the same dimension columns.
583 dimensions = self.universe.conform(
584 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)]
585 )
586 find_first = FindFirstDataset(
587 dimensions=DimensionKeyColumnTag.generate(dimensions.required),
588 rank=DatasetColumnTag(dataset_type.name, "rank"),
589 )
590 return find_first.apply(
591 base, preferred_engine=context.preferred_engine, require_preferred_engine=True
592 ).with_only_columns(base.columns - {find_first.rank})
594 def make_doomed_dataset_relation(
595 self,
596 dataset_type: DatasetType,
597 columns: Set[str],
598 messages: Iterable[str],
599 context: _C,
600 ) -> Relation:
601 """Construct a relation that represents a doomed query for datasets.
603 Parameters
604 ----------
605 dataset_type : `DatasetType`
606 Dataset type being queried.
607 columns : `~collections.abc.Set` [ `str` ]
608 Dataset columns to include (dimension key columns are always
609 included). See `make_dataset_query_relation` for allowed values.
610 messages : `~collections.abc.Iterable` [ `str` ]
611 Diagnostic messages that explain why the query is doomed to yield
612 no rows.
613 context : `QueryContext`
614 Context that manages per-query state.
616 Returns
617 -------
618 relation : `lsst.daf.relation.Relation`
619 Relation with the requested columns and no rows.
620 """
621 column_tags: set[ColumnTag] = set(
622 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names)
623 )
624 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns))
625 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages))
627 @abstractmethod
628 def make_dimension_relation(
629 self,
630 dimensions: DimensionGroup,
631 columns: Set[ColumnTag],
632 context: _C,
633 *,
634 initial_relation: Relation | None = None,
635 initial_join_max_columns: frozenset[ColumnTag] | None = None,
636 initial_dimension_relationships: Set[frozenset[str]] | None = None,
637 spatial_joins: Iterable[tuple[str, str]] = (),
638 governor_constraints: Mapping[str, Set[str]],
639 ) -> Relation:
640 """Construct a relation that provides columns and constraints from
641 dimension records.
643 Parameters
644 ----------
645 dimensions : `DimensionGroup`
646 Dimensions to include. The key columns for all dimensions (both
647 required and implied) will be included in the returned relation.
648 columns : `~collections.abc.Set` [ `ColumnTag` ]
649 Dimension record columns to include. This set may include key
650 column tags as well, though these may be ignored; the set of key
651 columns to include is determined by the ``dimensions`` argument
652 instead.
653 context : `QueryContext`
654 Context that manages per-query state.
655 initial_relation : `~lsst.daf.relation.Relation`, optional
656 Initial relation to join to the dimension relations. If this
657 relation provides record columns, key columns, and relationships
658 between key columns (see ``initial_dimension_relationships`` below)
659 that would otherwise have been added by joining in a dimension
660 element's relation, that relation may not be joined in at all.
661 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional
662 Maximum superset of common columns for joins to
663 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL
664 ``JOIN`` clauses). If provided, this is a subset of the dimension
665 key columns in ``initial_relation``, which are otherwise all
666 considered as potential common columns for joins. Ignored if
667 ``initial_relation`` is not provided.
668 initial_dimension_relationships : `~collections.abc.Set` \
669 [ `frozenset` [ `str` ] ], optional
670 A set of sets of dimension names representing relationships between
671 dimensions encoded in the rows of ``initial_relation``. If not
672 provided (and ``initial_relation`` is),
673 `extract_dimension_relationships` will be called on
674 ``initial_relation``.
675 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ]
676 Iterable of dimension element name pairs that should be spatially
677 joined.
678 governor_constraints : `~collections.abc.Mapping` [ `str` \
679 [ `~collections.abc.Set` [ `str` ] ] ], optional
680 Constraints on governor dimensions that are provided by other parts
681 of the query that either have been included in ``initial_relation``
682 or are guaranteed to be added in the future. This is a mapping from
683 governor dimension name to sets of values that dimension may take.
685 Returns
686 -------
687 relation : `lsst.daf.relation.Relation`
688 Relation containing the given dimension columns and constraints.
689 """
690 raise NotImplementedError()
692 @abstractmethod
693 def resolve_governor_constraints(
694 self, dimensions: DimensionGroup, constraints: Mapping[str, Set[str]], context: _C
695 ) -> Mapping[str, Set[str]]:
696 """Resolve governor dimension constraints provided by user input to
697 a query against the content in the `Registry`.
699 Parameters
700 ----------
701 dimensions : `DimensionGroup`
702 Dimensions that bound the governor dimensions to consider (via
703 ``dimensions.governors``, more specifically).
704 constraints : `~collections.abc.Mapping` [ `str`, \
705 `~collections.abc.Set` [ `str` ] ]
706 Constraints from user input to the query (e.g. from data IDs and
707 string expression predicates).
708 context : `QueryContext`
709 Object that manages state for the query; used here to fetch the
710 governor dimension record cache if it has not already been loaded.
712 Returns
713 -------
714 resolved : `~collections.abc.Mapping` [ `str`, \
715 `~collections.abc.Set` [ `str` ] ]
716 A shallow copy of ``constraints`` with keys equal to
717 ``dimensions.governors.names`` and value sets constrained by the
718 Registry content if they were not already in ``constraints``.
720 Raises
721 ------
722 DataIdValueError
723 Raised if ``constraints`` includes governor dimension values that
724 are not present in the `Registry`.
725 """
726 raise NotImplementedError()
728 @abstractmethod
729 def get_dimension_record_cache(
730 self, element_name: str, context: _C
731 ) -> Mapping[DataCoordinate, DimensionRecord] | None:
732 """Return a local cache of all `DimensionRecord` objects for a
733 dimension element, fetching it if necessary.
735 Parameters
736 ----------
737 element_name : `str`
738 Name of the dimension element.
739 context : `.queries.SqlQueryContext`
740 Context to be used to execute queries when no cached result is
741 available.
743 Returns
744 -------
745 cache : `~collections.abc.Mapping` [ `DataCoordinate`, \
746 `DimensionRecord` ] or `None`
747 Mapping from data ID to dimension record, or `None` if this
748 element's records are never cached.
749 """
750 raise NotImplementedError()
752 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]:
753 """Extract the dimension key relationships encoded in a relation tree.
755 Parameters
756 ----------
757 relation : `Relation`
758 Relation tree to process.
760 Returns
761 -------
762 relationships : `set` [ `frozenset` [ `str` ] ]
763 Set of sets of dimension names, where each inner set represents a
764 relationship between dimensions.
766 Notes
767 -----
768 Dimension relationships include both many-to-one implied dependencies
769 and many-to-many joins backed by "always-join" dimension elements, and
770 it's important to join in the dimension table that defines a
771 relationship in any query involving dimensions that are a superset of
772 that relationship. For example, let's consider a relation tree that
773 joins dataset existence-check relations for two dataset types, with
774 dimensions ``{instrument, exposure, detector}`` and ``{instrument,
775 physical_filter}``. The joined relation appears to have all dimension
776 keys in its expanded graph present except ``band``, and the system
777 could easily correct this by joining that dimension in directly. But
778 it's also missing the ``{instrument, exposure, physical_filter}``
779 relationship we'd get from the ``exposure`` dimension's own relation
780 (``exposure`` implies ``physical_filter``) and the similar
781 ``{instrument, physical_filter, band}`` relationship from the
782 ``physical_filter`` dimension relation; we need the relationship logic
783 to recognize that those dimensions need to be joined in as well in
784 order for the full relation to have rows that represent valid data IDs.
786 The implementation of this method relies on the assumption that
787 `LeafRelation` objects always have rows that are consistent with all
788 defined relationships (i.e. are valid data IDs). This is true for not
789 just dimension relations themselves, but anything created from queries
790 based on them, including datasets and query results. It is possible to
791 construct `LeafRelation` objects that don't satisfy this criteria (e.g.
792 when accepting in user-provided data IDs), and in this case
793 higher-level guards or warnings must be provided.``
794 """
795 return {
796 frozenset(
797 tag.dimension
798 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns)
799 )
800 for leaf_relation in self._extract_leaf_relations(relation).values()
801 }
803 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]:
804 """Recursively extract leaf relations from a relation tree.
806 Parameters
807 ----------
808 relation : `Relation`
809 Tree to process.
811 Returns
812 -------
813 leaves : `dict` [ `str`, `LeafRelation` ]
814 Leaf relations, keyed and deduplicated by name.
815 """
816 match relation:
817 case LeafRelation() as leaf:
818 return {leaf.name: leaf}
819 case UnaryOperationRelation(target=target):
820 return self._extract_leaf_relations(target)
821 case BinaryOperationRelation(lhs=lhs, rhs=rhs):
822 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs)
823 case MarkerRelation(target=target):
824 return self._extract_leaf_relations(target)
825 raise AssertionError("Match should be exhaustive and all branches should return.")