Coverage for python/lsst/daf/butler/queries/_identifiers.py: 13%
80 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("interpret_identifier", "IdentifierContext")
32import itertools
33from collections.abc import Mapping, Set
34from typing import Any, cast
36from .._exceptions import InvalidQueryError
37from ..dimensions import Dimension, DimensionGroup
38from .tree import (
39 DATASET_FIELD_NAMES,
40 ColumnExpression,
41 DatasetFieldName,
42 DatasetFieldReference,
43 DimensionFieldReference,
44 DimensionKeyReference,
45 UnaryExpression,
46 make_column_literal,
47)
50class IdentifierContext: # numpydoc ignore=PR01
51 """Contextual information that helps determine the meaning of an identifier
52 used in a query.
53 """
55 dimensions: DimensionGroup
56 """Dimensions already present in the query this filter is being applied
57 to. Returned expressions may reference dimensions outside this set.
58 """
59 datasets: Set[str]
60 """Dataset types already present in the query this filter is being applied
61 to. Returned expressions may reference datasets outside this set.
62 """
63 bind: Mapping[str, Any]
64 """Dictionary of bind literals to match identifiers against first."""
66 def __init__(
67 self, dimensions: DimensionGroup, datasets: Set[str], bind: Mapping[str, Any] | None = None
68 ) -> None:
69 self.dimensions = dimensions
70 self.datasets = datasets
71 if bind is None:
72 self.bind = {}
73 else:
74 # Make bind names case-insensitive.
75 self.bind = {k.lower(): v for k, v in bind.items()}
76 if len(self.bind.keys()) != len(bind.keys()):
77 raise ValueError(f"Duplicate keys present in bind: {bind.keys()}")
80def interpret_identifier(context: IdentifierContext, identifier: str) -> ColumnExpression:
81 """Associate an identifier in a ``where`` or ``order_by`` expression with
82 a query column or bind literal.
84 Parameters
85 ----------
86 context : `IdentifierContext`
87 Information about the query where this identifier is used.
88 identifier : `str`
89 String identifier to process.
91 Returns
92 -------
93 expression : `ColumnExpression`
94 Column expression corresponding to the identifier.
95 """
96 dimensions = context.dimensions
97 datasets = context.datasets
98 bind = context.bind
99 # Make identifiers case-insensitive.
100 identifier = identifier.lower()
102 if identifier in bind:
103 return make_column_literal(bind[identifier])
104 terms = identifier.split(".")
105 match len(terms):
106 case 1:
107 if identifier in dimensions.universe.dimensions:
108 return DimensionKeyReference.model_construct(
109 dimension=dimensions.universe.dimensions[identifier]
110 )
111 # This is an unqualified reference to a field of a dimension
112 # element or datasets; this is okay if it's unambiguous.
113 element_matches: set[str] = set()
114 for element_name in dimensions.elements:
115 element = dimensions.universe[element_name]
116 if identifier in element.schema.names:
117 element_matches.add(element_name)
118 if identifier in DATASET_FIELD_NAMES:
119 dataset_matches = set(datasets)
120 else:
121 dataset_matches = set()
122 if len(element_matches) + len(dataset_matches) > 1:
123 match_str = ", ".join(
124 f"'{x}.{identifier}'" for x in sorted(itertools.chain(element_matches, dataset_matches))
125 )
126 raise InvalidQueryError(
127 f"Ambiguous identifier {identifier!r} matches multiple fields: {match_str}."
128 )
129 elif element_matches:
130 element = dimensions.universe[element_matches.pop()]
131 return DimensionFieldReference.model_construct(element=element, field=identifier)
132 elif dataset_matches:
133 return DatasetFieldReference.model_construct(
134 dataset_type=dataset_matches.pop(), field=cast(DatasetFieldName, identifier)
135 )
136 case 2:
137 first, second = terms
138 if first in dimensions.universe.elements.names:
139 element = dimensions.universe[first]
140 if second in element.schema.dimensions.names:
141 if isinstance(element, Dimension) and second == element.primary_key.name:
142 # Identifier is something like "visit.id" which we want
143 # to interpret the same way as just "visit".
144 return DimensionKeyReference.model_construct(dimension=element)
145 else:
146 # Identifier is something like "visit.instrument",
147 # which we want to interpret the same way as just
148 # "instrument".
149 dimension = dimensions.universe.dimensions[second]
150 return DimensionKeyReference.model_construct(dimension=dimension)
151 elif second in element.schema.remainder.names:
152 return DimensionFieldReference.model_construct(element=element, field=second)
153 else:
154 raise InvalidQueryError(f"Unrecognized field {second!r} for {first}.")
155 elif second in DATASET_FIELD_NAMES:
156 # We just assume the dataset type is okay; it's the job of
157 # higher-level code to complain otherwise.
158 return DatasetFieldReference.model_construct(
159 dataset_type=first, field=cast(DatasetFieldName, second)
160 )
161 if first == "timespan":
162 base = interpret_identifier(context, "timespan")
163 if second == "begin":
164 return UnaryExpression(operand=base, operator="begin_of")
165 if second == "end":
166 return UnaryExpression(operand=base, operator="end_of")
167 elif first in datasets:
168 raise InvalidQueryError(
169 f"Identifier {identifier!r} references dataset type {first!r} but field "
170 f"{second!r} is not valid for datasets."
171 )
172 case 3:
173 base = interpret_identifier(context, ".".join(terms[:2]))
174 if terms[2] == "begin":
175 return UnaryExpression(operand=base, operator="begin_of")
176 if terms[2] == "end":
177 return UnaryExpression(operand=base, operator="end_of")
178 raise InvalidQueryError(f"Unrecognized identifier {identifier!r}.")