Coverage for python/lsst/daf/butler/queries/convert_args.py: 7%
95 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-04 02:55 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-04 02:55 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = (
31 "convert_where_args",
32 "convert_order_by_args",
33)
35import itertools
36from collections.abc import Mapping, Set
37from typing import Any, cast
39from ..dimensions import DataCoordinate, DataId, Dimension, DimensionGroup
40from .expression_factory import ExpressionFactory, ExpressionProxy
41from .tree import (
42 DATASET_FIELD_NAMES,
43 ColumnExpression,
44 DatasetFieldName,
45 DatasetFieldReference,
46 DimensionFieldReference,
47 DimensionKeyReference,
48 InvalidQueryError,
49 OrderExpression,
50 Predicate,
51 Reversed,
52 UnaryExpression,
53 make_column_literal,
54 validate_order_expression,
55)
58def convert_where_args(
59 dimensions: DimensionGroup,
60 datasets: Set[str],
61 *args: str | Predicate | DataId,
62 bind: Mapping[str, Any] | None = None,
63 **kwargs: Any,
64) -> Predicate:
65 """Convert ``where`` arguments to a sequence of column expressions.
67 Parameters
68 ----------
69 dimensions : `DimensionGroup`
70 Dimensions already present in the query this filter is being applied
71 to. Returned predicates may reference dimensions outside this set.
72 datasets : `~collections.abc.Set` [ `str` ]
73 Dataset types already present in the query this filter is being applied
74 to. Returned predicates may reference datasets outside this set; this
75 may be an error at a higher level, but it is not necessarily checked
76 here.
77 *args : `str`, `Predicate`, `DataCoordinate`, or `~collections.abc.Mapping`
78 Expressions to convert into predicates.
79 bind : `~collections.abc.Mapping`, optional
80 Mapping from identifier to literal value used when parsing string
81 expressions.
82 **kwargs : `object`
83 Additional data ID key-value pairs.
85 Returns
86 -------
87 predicate : `Predicate`
88 Standardized predicate object.
90 Notes
91 -----
92 Data ID values are not checked for consistency; they are extracted from
93 args and then kwargs and combined, with later extractions taking
94 precedence.
95 """
96 result = Predicate.from_bool(True)
97 data_id_dict: dict[str, Any] = {}
98 for arg in args:
99 match arg:
100 case str():
101 raise NotImplementedError("TODO: plug in registry.queries.expressions.parser")
102 case Predicate():
103 result = result.logical_and(arg)
104 case DataCoordinate():
105 data_id_dict.update(arg.mapping)
106 case _:
107 data_id_dict.update(arg)
108 data_id_dict.update(kwargs)
109 for k, v in data_id_dict.items():
110 result = result.logical_and(
111 Predicate.compare(
112 DimensionKeyReference.model_construct(dimension=dimensions.universe.dimensions[k]),
113 "==",
114 make_column_literal(v),
115 )
116 )
117 return result
120def convert_order_by_args(
121 dimensions: DimensionGroup, datasets: Set[str], *args: str | OrderExpression | ExpressionProxy
122) -> tuple[OrderExpression, ...]:
123 """Convert ``order_by`` arguments to a sequence of column expressions.
125 Parameters
126 ----------
127 dimensions : `DimensionGroup`
128 Dimensions already present in the query whose rows are being sorted.
129 Returned expressions may reference dimensions outside this set; this
130 may be an error at a higher level, but it is not necessarily checked
131 here.
132 datasets : `~collections.abc.Set` [ `str` ]
133 Dataset types already present in the query whose rows are being sorted.
134 Returned expressions may reference datasets outside this set; this may
135 be an error at a higher level, but it is not necessarily checked here.
136 *args : `OrderExpression`, `str`, or `ExpressionObject`
137 Expression or column names to sort by.
139 Returns
140 -------
141 expressions : `tuple` [ `OrderExpression`, ... ]
142 Standardized expression objects.
143 """
144 result: list[OrderExpression] = []
145 for arg in args:
146 match arg:
147 case str():
148 reverse = False
149 if arg.startswith("-"):
150 reverse = True
151 arg = arg[1:]
152 arg = interpret_identifier(dimensions, datasets, arg, {})
153 if reverse:
154 arg = Reversed(operand=arg)
155 case ExpressionProxy():
156 arg = ExpressionFactory.unwrap(arg)
157 if not hasattr(arg, "expression_type"):
158 raise TypeError(f"Unrecognized order-by argument: {arg!r}.")
159 result.append(validate_order_expression(arg))
160 return tuple(result)
163def interpret_identifier(
164 dimensions: DimensionGroup, datasets: Set[str], identifier: str, bind: Mapping[str, Any]
165) -> ColumnExpression:
166 """Associate an identifier in a ``where`` or ``order_by`` expression with
167 a query column or bind literal.
169 Parameters
170 ----------
171 dimensions : `DimensionGroup`
172 Dimensions already present in the query this filter is being applied
173 to. Returned expressions may reference dimensions outside this set.
174 datasets : `~collections.abc.Set` [ `str` ]
175 Dataset types already present in the query this filter is being applied
176 to. Returned expressions may reference datasets outside this set.
177 identifier : `str`
178 String identifier to process.
179 bind : `~collections.abc.Mapping` [ `str`, `object` ]
180 Dictionary of bind literals to match identifiers against first.
182 Returns
183 -------
184 expression : `ColumnExpression`
185 Column expression corresponding to the identifier.
186 """
187 if identifier in bind:
188 return make_column_literal(bind[identifier])
189 terms = identifier.split(".")
190 match len(terms):
191 case 1:
192 if identifier in dimensions.universe.dimensions:
193 return DimensionKeyReference.model_construct(
194 dimension=dimensions.universe.dimensions[identifier]
195 )
196 # This is an unqualified reference to a field of a dimension
197 # element or datasets; this is okay if it's unambiguous.
198 element_matches: set[str] = set()
199 for element_name in dimensions.elements:
200 element = dimensions.universe[element_name]
201 if identifier in element.schema.names:
202 element_matches.add(element_name)
203 if identifier in DATASET_FIELD_NAMES:
204 dataset_matches = set(datasets)
205 else:
206 dataset_matches = set()
207 if len(element_matches) + len(dataset_matches) > 1:
208 match_str = ", ".join(
209 f"'{x}.{identifier}'" for x in sorted(itertools.chain(element_matches, dataset_matches))
210 )
211 raise InvalidQueryError(
212 f"Ambiguous identifier {identifier!r} matches multiple fields: {match_str}."
213 )
214 elif element_matches:
215 element = dimensions.universe[element_matches.pop()]
216 return DimensionFieldReference.model_construct(element=element, field=identifier)
217 elif dataset_matches:
218 return DatasetFieldReference.model_construct(
219 dataset_type=dataset_matches.pop(), field=cast(DatasetFieldName, identifier)
220 )
221 case 2:
222 first, second = terms
223 if first in dimensions.universe.elements.names:
224 element = dimensions.universe[first]
225 if second in element.schema.dimensions.names:
226 if isinstance(element, Dimension) and second == element.primary_key.name:
227 # Identifier is something like "visit.id" which we want
228 # to interpret the same way as just "visit".
229 return DimensionKeyReference.model_construct(dimension=element)
230 else:
231 # Identifier is something like "visit.instrument",
232 # which we want to interpret the same way as just
233 # "instrument".
234 dimension = dimensions.universe.dimensions[second]
235 return DimensionKeyReference.model_construct(dimension=dimension)
236 elif second in element.schema.remainder.names:
237 return DimensionFieldReference.model_construct(element=element, field=second)
238 else:
239 raise InvalidQueryError(f"Unrecognized field {second!r} for {first}.")
240 elif second in DATASET_FIELD_NAMES:
241 # We just assume the dataset type is okay; it's the job of
242 # higher-level code to complain otherwise.
243 return DatasetFieldReference.model_construct(
244 dataset_type=first, field=cast(DatasetFieldName, second)
245 )
246 if first == "timespan":
247 base = interpret_identifier(dimensions, datasets, "timespan", bind)
248 if second == "begin":
249 return UnaryExpression(operand=base, operator="begin_of")
250 if second == "end":
251 return UnaryExpression(operand=base, operator="end_of")
252 elif first in datasets:
253 raise InvalidQueryError(
254 f"Identifier {identifier!r} references dataset type {first!r} but field "
255 f"{second!r} is not valid for datasets."
256 )
257 case 3:
258 base = interpret_identifier(dimensions, datasets, ".".join(terms[:2]), bind)
259 if terms[2] == "begin":
260 return UnaryExpression(operand=base, operator="begin_of")
261 if terms[2] == "end":
262 return UnaryExpression(operand=base, operator="end_of")
263 raise InvalidQueryError(f"Unrecognized identifier {identifier!r}.")