Coverage for python/lsst/daf/butler/registry/queries/_query_backend.py: 36%
106 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ("QueryBackend",)
31from abc import abstractmethod
32from collections.abc import Iterable, Mapping, Sequence, Set
33from typing import TYPE_CHECKING, Any, Generic, TypeVar
35from lsst.daf.relation import (
36 BinaryOperationRelation,
37 ColumnExpression,
38 ColumnTag,
39 LeafRelation,
40 MarkerRelation,
41 Predicate,
42 Relation,
43 UnaryOperationRelation,
44)
46from ...core import (
47 DataCoordinate,
48 DatasetColumnTag,
49 DatasetType,
50 DimensionGraph,
51 DimensionKeyColumnTag,
52 DimensionRecord,
53 DimensionUniverse,
54 timespan,
55)
56from .._collectionType import CollectionType
57from .._exceptions import DatasetTypeError, MissingDatasetTypeError
58from ..wildcards import CollectionWildcard
59from ._query_context import QueryContext
60from .find_first_dataset import FindFirstDataset
62if TYPE_CHECKING:
63 from ..interfaces import CollectionRecord
66_C = TypeVar("_C", bound=QueryContext)
69class QueryBackend(Generic[_C]):
70 """An interface for constructing and evaluating the
71 `~lsst.daf.relation.Relation` objects that comprise registry queries.
73 This ABC is expected to have a concrete subclass for each concrete registry
74 type, and most subclasses will be paired with a `QueryContext` subclass.
75 See `QueryContext` for the division of responsibilities between these two
76 interfaces.
77 """
79 @property
80 @abstractmethod
81 def universe(self) -> DimensionUniverse:
82 """Definition of all dimensions and dimension elements for this
83 registry (`DimensionUniverse`).
84 """
85 raise NotImplementedError()
87 def context(self) -> _C:
88 """Return a context manager that can be used to execute queries with
89 this backend.
91 Returns
92 -------
93 context : `QueryContext`
94 Context manager that manages state and connections needed to
95 execute queries.
96 """
97 raise NotImplementedError()
99 @abstractmethod
100 def get_collection_name(self, key: Any) -> str:
101 """Return the collection name associated with a collection primary key
102 value.
104 Parameters
105 ----------
106 key
107 Collection primary key value.
109 Returns
110 -------
111 name : `str`
112 Collection name.
113 """
114 raise NotImplementedError()
116 @abstractmethod
117 def resolve_collection_wildcard(
118 self,
119 expression: Any,
120 *,
121 collection_types: Set[CollectionType] = CollectionType.all(),
122 done: set[str] | None = None,
123 flatten_chains: bool = True,
124 include_chains: bool | None = None,
125 ) -> list[CollectionRecord]:
126 """Return the collection records that match a wildcard expression.
128 Parameters
129 ----------
130 expression
131 Names and/or patterns for collections; will be passed to
132 `CollectionWildcard.from_expression`.
133 collection_types : `collections.abc.Set` [ `CollectionType` ], optional
134 If provided, only yield collections of these types.
135 done : `set` [ `str` ], optional
136 A set of collection names that should be skipped, updated to
137 include all processed collection names on return.
138 flatten_chains : `bool`, optional
139 If `True` (default) recursively yield the child collections of
140 `~CollectionType.CHAINED` collections.
141 include_chains : `bool`, optional
142 If `False`, return records for `~CollectionType.CHAINED`
143 collections themselves. The default is the opposite of
144 ``flattenChains``: either return records for CHAINED collections or
145 their children, but not both.
147 Returns
148 -------
149 records : `list` [ `CollectionRecord` ]
150 Matching collection records.
151 """
152 raise NotImplementedError()
154 @abstractmethod
155 def resolve_dataset_type_wildcard(
156 self,
157 expression: Any,
158 components: bool | None = None,
159 missing: list[str] | None = None,
160 explicit_only: bool = False,
161 components_deprecated: bool = True,
162 ) -> dict[DatasetType, list[str | None]]:
163 """Return the dataset types that match a wildcard expression.
165 Parameters
166 ----------
167 expression
168 Names and/or patterns for dataset types; will be passed to
169 `DatasetTypeWildcard.from_expression`.
170 components : `bool`, optional
171 If `True`, apply all expression patterns to component dataset type
172 names as well. If `False`, never apply patterns to components. If
173 `None` (default), apply patterns to components only if their parent
174 datasets were not matched by the expression. Fully-specified
175 component datasets (`str` or `DatasetType` instances) are always
176 included.
177 missing : `list` of `str`, optional
178 String dataset type names that were explicitly given (i.e. not
179 regular expression patterns) but not found will be appended to this
180 list, if it is provided.
181 explicit_only : `bool`, optional
182 If `True`, require explicit `DatasetType` instances or `str` names,
183 with `re.Pattern` instances deprecated and ``...`` prohibited.
184 components_deprecated : `bool`, optional
185 If `True`, this is a context in which component dataset support is
186 deprecated. This will result in a deprecation warning when
187 ``components=True`` or ``components=None`` and a component dataset
188 is matched. In the future this will become an error.
190 Returns
191 -------
192 dataset_types : `dict` [ `DatasetType`, `list` [ `None`, `str` ] ]
193 A mapping with resolved dataset types as keys and lists of
194 matched component names as values, where `None` indicates the
195 parent composite dataset type was matched.
196 """
197 raise NotImplementedError()
199 def resolve_single_dataset_type_wildcard(
200 self,
201 expression: Any,
202 components: bool | None = None,
203 explicit_only: bool = False,
204 components_deprecated: bool = True,
205 ) -> tuple[DatasetType, list[str | None]]:
206 """Return a single dataset type that matches a wildcard expression.
208 Parameters
209 ----------
210 expression
211 Names and/or patterns for the dataset type; will be passed to
212 `DatasetTypeWildcard.from_expression`.
213 components : `bool`, optional
214 If `True`, apply all expression patterns to component dataset type
215 names as well. If `False`, never apply patterns to components. If
216 `None` (default), apply patterns to components only if their parent
217 datasets were not matched by the expression. Fully-specified
218 component datasets (`str` or `DatasetType` instances) are always
219 included.
220 explicit_only : `bool`, optional
221 If `True`, require explicit `DatasetType` instances or `str` names,
222 with `re.Pattern` instances deprecated and ``...`` prohibited.
223 components_deprecated : `bool`, optional
224 If `True`, this is a context in which component dataset support is
225 deprecated. This will result in a deprecation warning when
226 ``components=True`` or ``components=None`` and a component dataset
227 is matched. In the future this will become an error.
229 Returns
230 -------
231 single_parent : `DatasetType`
232 The matched parent dataset type.
233 single_components : `list` [ `str` | `None` ]
234 The matched components that correspond to this parent, or `None` if
235 the parent dataset type itself was matched.
237 Notes
238 -----
239 This method really finds a single parent dataset type and any number of
240 components, because it's only the parent dataset type that's known to
241 registry at all; many callers are expected to discard the
242 ``single_components`` return value.
243 """
244 missing: list[str] = []
245 matching = self.resolve_dataset_type_wildcard(
246 expression,
247 components=components,
248 missing=missing,
249 explicit_only=explicit_only,
250 components_deprecated=components_deprecated,
251 )
252 if not matching:
253 if missing:
254 raise MissingDatasetTypeError(
255 "\n".join(
256 f"Dataset type {t!r} is not registered, so no instances of it can exist."
257 for t in missing
258 )
259 )
260 else:
261 raise MissingDatasetTypeError(
262 f"No registered dataset types matched expression {expression!r}, "
263 "so no datasets will be found."
264 )
265 if len(matching) > 1:
266 raise DatasetTypeError(
267 f"Expression {expression!r} matched multiple parent dataset types: "
268 f"{[t.name for t in matching]}, but only one is allowed."
269 )
270 ((single_parent, single_components),) = matching.items()
271 if missing:
272 raise DatasetTypeError(
273 f"Expression {expression!r} appears to involve multiple dataset types, even though only "
274 f"one ({single_parent.name}) is registered, and only one is allowed here."
275 )
276 return single_parent, single_components
278 @abstractmethod
279 def filter_dataset_collections(
280 self,
281 dataset_types: Iterable[DatasetType],
282 collections: Sequence[CollectionRecord],
283 *,
284 governor_constraints: Mapping[str, Set[str]],
285 rejections: list[str] | None = None,
286 ) -> dict[DatasetType, list[CollectionRecord]]:
287 """Filter a sequence of collections to those for which a dataset query
288 might succeed.
290 Parameters
291 ----------
292 dataset_types : `~collections.abc.Iterable` [ `DatasetType` ]
293 Dataset types that are being queried. Must include only parent
294 or standalone dataset types, not components.
295 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
296 Sequence of collections that will be searched.
297 governor_constraints : `~collections.abc.Mapping` [ `str`, \
298 `~collections.abc.Set` [ `str` ] ], optional
299 Constraints imposed by other aspects of the query on governor
300 dimensions; collections inconsistent with these constraints will be
301 skipped.
302 rejections : `list` [ `str` ], optional
303 If not `None`, a `list` that diagnostic messages will be appended
304 to, for any collection that matches ``collections`` that is not
305 returned. At least one message is guaranteed whenever the result
306 is empty.
308 Returns
309 -------
310 dataset_collections : `dict` [ `DatasetType`, \
311 `list` [ `CollectionRecord` ] ]
312 The collections to search for each dataset. The dictionary's keys
313 are always exactly ``dataset_types`` (in the same order), and each
314 nested `list` of collections is ordered consistently with the
315 given ``collections``.
317 Notes
318 -----
319 This method accepts multiple dataset types and multiple collections at
320 once to enable implementations to batch up the fetching of summary
321 information needed to relate them.
322 """
323 raise NotImplementedError()
325 def resolve_dataset_collections(
326 self,
327 dataset_type: DatasetType,
328 collections: CollectionWildcard,
329 *,
330 governor_constraints: Mapping[str, Set[str]],
331 rejections: list[str] | None = None,
332 collection_types: Set[CollectionType] = CollectionType.all(),
333 allow_calibration_collections: bool = False,
334 ) -> list[CollectionRecord]:
335 """Resolve the sequence of collections to query for a dataset type.
337 Parameters
338 ----------
339 dataset_type : `DatasetType`
340 Dataset type to be queried in the returned collections.
341 collections : `CollectionWildcard`
342 Expression for the collections to be queried.
343 governor_constraints : `~collections.abc.Mapping` [ `str`, \
344 `~collections.abc.Set` ], optional
345 Constraints imposed by other aspects of the query on governor
346 dimensions; collections inconsistent with these constraints will be
347 skipped.
348 rejections : `list` [ `str` ], optional
349 If not `None`, a `list` that diagnostic messages will be appended
350 to, for any collection that matches ``collections`` that is not
351 returned. At least one message is guaranteed whenever the result
352 is empty.
353 collection_types : `~collections.abc.Set` [ `CollectionType` ], \
354 optional
355 Collection types to consider when resolving the collection
356 expression.
357 allow_calibration_collections : `bool`, optional
358 If `False`, skip (with a ``rejections`` message) any calibration
359 collections that match ``collections`` are not given explicitly by
360 name, and raise `NotImplementedError` for any calibration
361 collection that is given explicitly. This is a temporary option
362 that will be removed when the query system can handle temporal
363 joins involving calibration collections.
365 Returns
366 -------
367 records : `list` [ `CollectionRecord` ]
368 A new list of `CollectionRecord` instances, for collections that
369 both match ``collections`` and may have datasets of the given type.
371 Notes
372 -----
373 This is a higher-level driver for `resolve_collection_wildcard` and
374 `filter_dataset_collections` that is mostly concerned with handling
375 queries against `~Collection.Type.CALIBRATION` collections that aren't
376 fully supported yet. Once that support improves, this method may be
377 removed.
378 """
379 if collections == CollectionWildcard() and collection_types == CollectionType.all():
380 collection_types = {CollectionType.RUN}
381 explicit_collections = frozenset(collections.strings)
382 matching_collection_records = self.resolve_collection_wildcard(
383 collections, collection_types=collection_types
384 )
385 ((_, filtered_collection_records),) = self.filter_dataset_collections(
386 [dataset_type],
387 matching_collection_records,
388 governor_constraints=governor_constraints,
389 rejections=rejections,
390 ).items()
391 if not allow_calibration_collections:
392 supported_collection_records: list[CollectionRecord] = []
393 for record in filtered_collection_records:
394 if record.type is CollectionType.CALIBRATION:
395 # If collection name was provided explicitly then raise,
396 # since this is a kind of query we don't support yet;
397 # otherwise collection is a part of a chained one or regex
398 # match, and we skip it to not break queries of other
399 # included collections.
400 if record.name in explicit_collections:
401 raise NotImplementedError(
402 f"Query for dataset type {dataset_type.name!r} in CALIBRATION-type "
403 f"collection {record.name!r} is not yet supported."
404 )
405 else:
406 if rejections is not None:
407 rejections.append(
408 f"Not searching for dataset {dataset_type.name!r} in CALIBRATION "
409 f"collection {record.name!r} because calibration queries aren't fully "
410 "implemented; this is not an error only because the query structure "
411 "implies that searching this collection may be incidental."
412 )
413 supported_collection_records.append(record)
414 else:
415 supported_collection_records.append(record)
416 else:
417 supported_collection_records = filtered_collection_records
418 if not supported_collection_records and rejections is not None and not rejections:
419 rejections.append(f"No collections to search matching expression {collections!r}.")
420 return supported_collection_records
422 @abstractmethod
423 def _make_dataset_query_relation_impl(
424 self,
425 dataset_type: DatasetType,
426 collections: Sequence[CollectionRecord],
427 columns: Set[str],
428 context: _C,
429 ) -> Relation:
430 """Construct a relation that represents an unordered query for datasets
431 that returns matching results from all given collections.
433 Parameters
434 ----------
435 dataset_type : `DatasetType`
436 Type for the datasets being queried.
437 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
438 Records for collections to query. Should generally be the result
439 of a call to `resolve_dataset_collections`, and must not be empty.
440 context : `QueryContext`
441 Context that manages per-query state.
442 columns : `~collections.abc.Set` [ `str` ]
443 Columns to include in the relation. See `Query.find_datasets` for
444 details.
446 Returns
447 -------
448 relation : `lsst.daf.relation.Relation`
449 Relation representing a dataset query.
451 Notes
452 -----
453 This method must be implemented by derived classes but is not
454 responsible for joining the resulting relation to an existing relation.
455 """
456 raise NotImplementedError()
458 def make_dataset_query_relation(
459 self,
460 dataset_type: DatasetType,
461 collections: Sequence[CollectionRecord],
462 columns: Set[str],
463 context: _C,
464 *,
465 join_to: Relation | None = None,
466 temporal_join_on: Set[ColumnTag] = frozenset(),
467 ) -> Relation:
468 """Construct a relation that represents an unordered query for datasets
469 that returns matching results from all given collections.
471 Parameters
472 ----------
473 dataset_type : `DatasetType`
474 Type for the datasets being queried.
475 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
476 Records for collections to query. Should generally be the result
477 of a call to `resolve_dataset_collections`, and must not be empty.
478 context : `QueryContext`
479 Context that manages per-query state.
480 columns : `~collections.abc.Set` [ `str` ]
481 Columns to include in the relation. See `Query.find_datasets` for
482 details.
483 join_to : `Relation`, optional
484 Another relation to join with the query for datasets in all
485 collections.
486 temporal_join_on: `~collections.abc.Set` [ `ColumnTag` ], optional
487 Timespan columns in ``join_to`` that calibration dataset timespans
488 must overlap. Must already be present in ``join_to``. Ignored if
489 ``join_to`` is `None` or if there are no calibration collections.
491 Returns
492 -------
493 relation : `lsst.daf.relation.Relation`
494 Relation representing a dataset query.
495 """
496 # If we need to do a temporal join to a calibration collection, we need
497 # to include the timespan column in the base query and prepare the join
498 # predicate.
499 join_predicates: list[Predicate] = []
500 base_timespan_tag: ColumnTag | None = None
501 full_columns: set[str] = set(columns)
502 if (
503 temporal_join_on
504 and join_to is not None
505 and any(r.type is CollectionType.CALIBRATION for r in collections)
506 ):
507 base_timespan_tag = DatasetColumnTag(dataset_type.name, "timespan")
508 rhs = ColumnExpression.reference(base_timespan_tag, dtype=timespan.Timespan)
509 full_columns.add("timespan")
510 for timespan_tag in temporal_join_on:
511 lhs = ColumnExpression.reference(timespan_tag, dtype=timespan.Timespan)
512 join_predicates.append(lhs.predicate_method("overlaps", rhs))
513 # Delegate to the concrete QueryBackend subclass to do most of the
514 # work.
515 result = self._make_dataset_query_relation_impl(
516 dataset_type,
517 collections,
518 full_columns,
519 context=context,
520 )
521 if join_to is not None:
522 result = join_to.join(
523 result, predicate=Predicate.logical_and(*join_predicates) if join_predicates else None
524 )
525 if join_predicates and "timespan" not in columns:
526 # Drop the timespan column we added for the join only if the
527 # timespan wasn't requested in its own right.
528 result = result.with_only_columns(result.columns - {base_timespan_tag})
529 return result
531 def make_dataset_search_relation(
532 self,
533 dataset_type: DatasetType,
534 collections: Sequence[CollectionRecord],
535 columns: Set[str],
536 context: _C,
537 *,
538 join_to: Relation | None = None,
539 temporal_join_on: Set[ColumnTag] = frozenset(),
540 ) -> Relation:
541 """Construct a relation that represents an order query for datasets
542 that returns results from the first matching collection for each data
543 ID.
545 Parameters
546 ----------
547 dataset_type : `DatasetType`
548 Type for the datasets being search.
549 collections : `~collections.abc.Sequence` [ `CollectionRecord` ]
550 Records for collections to search. Should generally be the result
551 of a call to `resolve_dataset_collections`, and must not be empty.
552 columns : `~collections.abc.Set` [ `str` ]
553 Columns to include in the ``relation``. See
554 `make_dataset_query_relation` for options.
555 context : `QueryContext`
556 Context that manages per-query state.
557 join_to : `Relation`, optional
558 Another relation to join with the query for datasets in all
559 collections before filtering out out shadowed datasets.
560 temporal_join_on: `~collections.abc.Set` [ `ColumnTag` ], optional
561 Timespan columns in ``join_to`` that calibration dataset timespans
562 must overlap. Must already be present in ``join_to``. Ignored if
563 ``join_to`` is `None` or if there are no calibration collections.
565 Returns
566 -------
567 relation : `lsst.daf.relation.Relation`
568 Relation representing a find-first dataset search.
569 """
570 base = self.make_dataset_query_relation(
571 dataset_type,
572 collections,
573 columns | {"rank"},
574 context=context,
575 join_to=join_to,
576 temporal_join_on=temporal_join_on,
577 )
578 # Query-simplification shortcut: if there is only one collection, a
579 # find-first search is just a regular result subquery. Same if there
580 # are no collections.
581 if len(collections) <= 1:
582 return base
583 # We filter the dimension keys in the given relation through
584 # DimensionGraph.required.names to minimize the set we partition on
585 # and order it in a more index-friendly way. More precisely, any
586 # index we define on dimensions will be consistent with this order, but
587 # any particular index may not have the same dimension columns.
588 dimensions = self.universe.extract(
589 [tag.dimension for tag in DimensionKeyColumnTag.filter_from(base.columns)]
590 )
591 find_first = FindFirstDataset(
592 dimensions=DimensionKeyColumnTag.generate(dimensions.required.names),
593 rank=DatasetColumnTag(dataset_type.name, "rank"),
594 )
595 return find_first.apply(
596 base, preferred_engine=context.preferred_engine, require_preferred_engine=True
597 ).with_only_columns(base.columns - {find_first.rank})
599 def make_doomed_dataset_relation(
600 self,
601 dataset_type: DatasetType,
602 columns: Set[str],
603 messages: Iterable[str],
604 context: _C,
605 ) -> Relation:
606 """Construct a relation that represents a doomed query for datasets.
608 Parameters
609 ----------
610 dataset_type : `DatasetType`
611 Dataset type being queried.
612 columns : `~collections.abc.Set` [ `str` ]
613 Dataset columns to include (dimension key columns are always
614 included). See `make_dataset_query_relation` for allowed values.
615 messages : `~collections.abc.Iterable` [ `str` ]
616 Diagnostic messages that explain why the query is doomed to yield
617 no rows.
618 context : `QueryContext`
619 Context that manages per-query state.
621 Returns
622 -------
623 relation : `lsst.daf.relation.Relation`
624 Relation with the requested columns and no rows.
625 """
626 column_tags: set[ColumnTag] = set(
627 DimensionKeyColumnTag.generate(dataset_type.dimensions.required.names)
628 )
629 column_tags.update(DatasetColumnTag.generate(dataset_type.name, columns))
630 return context.preferred_engine.make_doomed_relation(columns=column_tags, messages=list(messages))
632 @abstractmethod
633 def make_dimension_relation(
634 self,
635 dimensions: DimensionGraph,
636 columns: Set[ColumnTag],
637 context: _C,
638 *,
639 initial_relation: Relation | None = None,
640 initial_join_max_columns: frozenset[ColumnTag] | None = None,
641 initial_dimension_relationships: Set[frozenset[str]] | None = None,
642 spatial_joins: Iterable[tuple[str, str]] = (),
643 governor_constraints: Mapping[str, Set[str]],
644 ) -> Relation:
645 """Construct a relation that provides columns and constraints from
646 dimension records.
648 Parameters
649 ----------
650 dimensions : `DimensionGraph`
651 Dimensions to include. The key columns for all dimensions (both
652 required and implied) will be included in the returned relation.
653 columns : `~collections.abc.Set` [ `ColumnTag` ]
654 Dimension record columns to include. This set may include key
655 column tags as well, though these may be ignored; the set of key
656 columns to include is determined by the ``dimensions`` argument
657 instead.
658 context : `QueryContext`
659 Context that manages per-query state.
660 initial_relation : `~lsst.daf.relation.Relation`, optional
661 Initial relation to join to the dimension relations. If this
662 relation provides record columns, key columns, and relationships
663 between key columns (see ``initial_dimension_relationships`` below)
664 that would otherwise have been added by joining in a dimension
665 element's relation, that relation may not be joined in at all.
666 initial_join_max_columns : `frozenset` [ `ColumnTag` ], optional
667 Maximum superset of common columns for joins to
668 ``initial_relation`` (i.e. columns in the ``ON`` expression of SQL
669 ``JOIN`` clauses). If provided, this is a subset of the dimension
670 key columns in ``initial_relation``, which are otherwise all
671 considered as potential common columns for joins. Ignored if
672 ``initial_relation`` is not provided.
673 initial_dimension_relationships : `~collections.abc.Set` \
674 [ `frozenset` [ `str` ] ], optional
675 A set of sets of dimension names representing relationships between
676 dimensions encoded in the rows of ``initial_relation``. If not
677 provided (and ``initial_relation`` is),
678 `extract_dimension_relationships` will be called on
679 ``initial_relation``.
680 spatial_joins : `collections.abc.Iterable` [ `tuple` [ `str`, `str` ] ]
681 Iterable of dimension element name pairs that should be spatially
682 joined.
683 governor_constraints : `~collections.abc.Mapping` [ `str` \
684 [ `~collections.abc.Set` [ `str` ] ] ], optional
685 Constraints on governor dimensions that are provided by other parts
686 of the query that either have been included in ``initial_relation``
687 or are guaranteed to be added in the future. This is a mapping from
688 governor dimension name to sets of values that dimension may take.
690 Returns
691 -------
692 relation : `lsst.daf.relation.Relation`
693 Relation containing the given dimension columns and constraints.
694 """
695 raise NotImplementedError()
697 @abstractmethod
698 def resolve_governor_constraints(
699 self, dimensions: DimensionGraph, constraints: Mapping[str, Set[str]], context: _C
700 ) -> Mapping[str, Set[str]]:
701 """Resolve governor dimension constraints provided by user input to
702 a query against the content in the `Registry`.
704 Parameters
705 ----------
706 dimensions : `DimensionGraph`
707 Dimensions that bound the governor dimensions to consider (via
708 ``dimensions.governors``, more specifically).
709 constraints : `~collections.abc.Mapping` [ `str`, \
710 `~collections.abc.Set` [ `str` ] ]
711 Constraints from user input to the query (e.g. from data IDs and
712 string expression predicates).
713 context : `QueryContext`
714 Object that manages state for the query; used here to fetch the
715 governor dimension record cache if it has not already been loaded.
717 Returns
718 -------
719 resolved : `~collections.abc.Mapping` [ `str`, \
720 `~collections.abc.Set` [ `str` ] ]
721 A shallow copy of ``constraints`` with keys equal to
722 ``dimensions.governors.names`` and value sets constrained by the
723 Registry content if they were not already in ``constraints``.
725 Raises
726 ------
727 DataIdValueError
728 Raised if ``constraints`` includes governor dimension values that
729 are not present in the `Registry`.
730 """
731 raise NotImplementedError()
733 @abstractmethod
734 def get_dimension_record_cache(
735 self, element_name: str, context: _C
736 ) -> Mapping[DataCoordinate, DimensionRecord] | None:
737 """Return a local cache of all `DimensionRecord` objects for a
738 dimension element, fetching it if necessary.
740 Parameters
741 ----------
742 element_name : `str`
743 Name of the dimension element.
744 context : `.queries.SqlQueryContext`
745 Context to be used to execute queries when no cached result is
746 available.
748 Returns
749 -------
750 cache : `~collections.abc.Mapping` [ `DataCoordinate`, \
751 `DimensionRecord` ] or `None`
752 Mapping from data ID to dimension record, or `None` if this
753 element's records are never cached.
754 """
755 raise NotImplementedError()
757 def extract_dimension_relationships(self, relation: Relation) -> set[frozenset[str]]:
758 """Extract the dimension key relationships encoded in a relation tree.
760 Parameters
761 ----------
762 relation : `Relation`
763 Relation tree to process.
765 Returns
766 -------
767 relationships : `set` [ `frozenset` [ `str` ] ]
768 Set of sets of dimension names, where each inner set represents a
769 relationship between dimensions.
771 Notes
772 -----
773 Dimension relationships include both many-to-one implied dependencies
774 and many-to-many joins backed by "always-join" dimension elements, and
775 it's important to join in the dimension table that defines a
776 relationship in any query involving dimensions that are a superset of
777 that relationship. For example, let's consider a relation tree that
778 joins dataset existence-check relations for two dataset types, with
779 dimensions ``{instrument, exposure, detector}`` and ``{instrument,
780 physical_filter}``. The joined relation appears to have all dimension
781 keys in its expanded graph present except ``band``, and the system
782 could easily correct this by joining that dimension in directly. But
783 it's also missing the ``{instrument, exposure, physical_filter}``
784 relationship we'd get from the ``exposure`` dimension's own relation
785 (``exposure`` implies ``physical_filter``) and the similar
786 ``{instrument, physical_filter, band}`` relationship from the
787 ``physical_filter`` dimension relation; we need the relationship logic
788 to recognize that those dimensions need to be joined in as well in
789 order for the full relation to have rows that represent valid data IDs.
791 The implementation of this method relies on the assumption that
792 `LeafRelation` objects always have rows that are consistent with all
793 defined relationships (i.e. are valid data IDs). This is true for not
794 just dimension relations themselves, but anything created from queries
795 based on them, including datasets and query results. It is possible to
796 construct `LeafRelation` objects that don't satisfy this criteria (e.g.
797 when accepting in user-provided data IDs), and in this case
798 higher-level guards or warnings must be provided.``
799 """
800 return {
801 frozenset(
802 tag.dimension
803 for tag in DimensionKeyColumnTag.filter_from(leaf_relation.columns & relation.columns)
804 )
805 for leaf_relation in self._extract_leaf_relations(relation).values()
806 }
808 def _extract_leaf_relations(self, relation: Relation) -> dict[str, LeafRelation]:
809 """Recursively extract leaf relations from a relation tree.
811 Parameters
812 ----------
813 relation : `Relation`
814 Tree to process.
816 Returns
817 -------
818 leaves : `dict` [ `str`, `LeafRelation` ]
819 Leaf relations, keyed and deduplicated by name.
820 """
821 match relation:
822 case LeafRelation() as leaf:
823 return {leaf.name: leaf}
824 case UnaryOperationRelation(target=target):
825 return self._extract_leaf_relations(target)
826 case BinaryOperationRelation(lhs=lhs, rhs=rhs):
827 return self._extract_leaf_relations(lhs) | self._extract_leaf_relations(rhs)
828 case MarkerRelation(target=target):
829 return self._extract_leaf_relations(target)
830 raise AssertionError("Match should be exhaustive and all branches should return.")