Coverage for python/lsst/daf/butler/registry/queries/expressions/categorize.py: 7%
102 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-05 01:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
25import enum
26from typing import cast
28from ....core import Dimension, DimensionElement, DimensionGraph, DimensionUniverse
31class ExpressionConstant(enum.Enum):
32 """Enumeration for constants recognized in all expressions."""
34 NULL = "null"
35 INGEST_DATE = "ingest_date"
38def categorizeConstant(name: str) -> ExpressionConstant | None:
39 """Categorize an identifier in a parsed expression as one of a few global
40 constants.
42 Parameters
43 ----------
44 name : `str`
45 Identifier to categorize. Case-insensitive.
47 Returns
48 -------
49 categorized : `ExpressionConstant` or `None`
50 Enumeration value if the string represents a constant, `None`
51 otherwise.
52 """
53 try:
54 return ExpressionConstant(name.lower())
55 except ValueError:
56 return None
59def categorizeElementId(universe: DimensionUniverse, name: str) -> tuple[DimensionElement, str | None]:
60 """Categorize an identifier in a parsed expression as either a `Dimension`
61 name (indicating the primary key for that dimension) or a non-primary-key
62 column in a `DimensionElement` table.
64 Parameters
65 ----------
66 universe : `DimensionUniverse`
67 All known dimensions.
68 name : `str`
69 Identifier to categorize.
71 Returns
72 -------
73 element : `DimensionElement`
74 The `DimensionElement` the identifier refers to.
75 column : `str` or `None`
76 The name of a column in the table for ``element``, or `None` if
77 ``element`` is a `Dimension` and the requested column is its primary
78 key.
80 Raises
81 ------
82 LookupError
83 Raised if the identifier refers to a nonexistent `DimensionElement`
84 or column.
85 RuntimeError
86 Raised if the expression refers to a primary key in an illegal way.
87 This exception includes a suggestion for how to rewrite the expression,
88 so at least its message should generally be propagated up to a context
89 where the error can be interpreted by a human.
90 """
91 table, _, column = name.partition(".")
92 if column:
93 try:
94 element = universe[table]
95 except KeyError:
96 if table == "timespan" or table == "datetime" or table == "timestamp":
97 raise LookupError(
98 "Dimension element name cannot be inferred in this context; "
99 f"use <dimension>.timespan.{column} instead."
100 ) from None
101 raise LookupError(f"No dimension element with name {table!r} in {name!r}.") from None
102 if isinstance(element, Dimension) and column == element.primaryKey.name:
103 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
104 # can be clearer.
105 return element, None
106 elif column in element.graph.names:
107 # User said something like "patch.tract = x" or
108 # "tract.tract = x" instead of just "tract = x" or
109 # "tract.id = x", which is at least needlessly confusing and
110 # possibly not actually a column name, though we can guess
111 # what they were trying to do.
112 # Encourage them to clean that up and try again.
113 name = universe[column].primaryKey.name # type: ignore
114 raise RuntimeError(
115 f"Invalid reference to '{table}.{column}' "
116 f"in expression; please use '{column}' or "
117 f"'{column}.{name}' instead."
118 )
119 else:
120 return element, column
121 else:
122 try:
123 dimension = universe[table]
124 except KeyError as err:
125 raise LookupError(f"No dimension with name '{table}'.") from err
126 return dimension, None
129def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionElement, str | None]:
130 """Categorize an identifier in an ORDER BY clause.
132 Parameters
133 ----------
134 graph : `DimensionGraph`
135 All known dimensions.
136 name : `str`
137 Identifier to categorize.
139 Returns
140 -------
141 element : `DimensionElement`
142 The `DimensionElement` the identifier refers to.
143 column : `str` or `None`
144 The name of a column in the table for ``element``, or `None` if
145 ``element`` is a `Dimension` and the requested column is its primary
146 key.
148 Raises
149 ------
150 ValueError
151 Raised if element name is not found in a graph, metadata name is not
152 recognized, or if there is more than one element has specified
153 metadata.
155 Notes
156 -----
157 For ORDER BY identifiers we use slightly different set of rules compared to
158 the rules in `categorizeElementId`:
160 - Name can be a dimension element name. e.g. ``visit``.
161 - Name can be an element name and a metadata name (or key name) separated
162 by dot, e.g. ``detector.full_name``.
163 - Name can be a metadata name without element name prefix, e.g.
164 ``day_obs``; in that case metadata (or key) is searched in all elements
165 present in a graph. Exception is raised if name appears in more than one
166 element.
167 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be
168 used with temporal elements, if element name is not given then a temporal
169 element from a graph is used.
170 """
171 element: DimensionElement
172 field_name: str | None = None
173 if name in ("timespan.begin", "timespan.end"):
174 matches = [element for element in graph.elements if element.temporal]
175 if len(matches) == 1:
176 element = matches[0]
177 field_name = name
178 elif len(matches) > 1:
179 raise ValueError(
180 "Timespan exists in more than one dimension element "
181 f"({', '.join(element.name for element in matches)}); "
182 "qualify timespan with specific dimension name."
183 )
184 else:
185 raise ValueError(f"Cannot find any temporal dimension element for '{name}'.")
186 elif "." not in name:
187 # No dot, can be either a dimension name or a field name (in any of
188 # the known elements)
189 if name in graph.elements.names:
190 element = graph.elements[name]
191 else:
192 # Can be a metadata name or any of unique keys
193 match_pairs: list[tuple[DimensionElement, bool]] = [
194 (elem, False) for elem in graph.elements if name in elem.metadata.names
195 ]
196 match_pairs += [(dim, True) for dim in graph if name in dim.uniqueKeys.names]
197 if len(match_pairs) == 1:
198 element, is_dimension_key = match_pairs[0]
199 if is_dimension_key and name == cast(Dimension, element).primaryKey.name:
200 # Need to treat reference to primary key field as a
201 # reference to the dimension name.
202 return element, None
203 field_name = name
204 elif len(match_pairs) > 1:
205 raise ValueError(
206 f"Metadata '{name}' exists in more than one dimension element "
207 f"({', '.join(element.name for element, _ in match_pairs)}); "
208 "qualify field name with dimension name."
209 )
210 else:
211 raise ValueError(f"Metadata '{name}' cannot be found in any dimension.")
212 else:
213 # qualified name, must be a dimension element and a field
214 elem_name, _, field_name = name.partition(".")
215 if elem_name not in graph.elements.names:
216 if field_name == "begin" or field_name == "end":
217 raise ValueError(
218 f"Unknown dimension element {elem_name!r}; perhaps you meant 'timespan.{field_name}'?"
219 )
220 raise ValueError(f"Unknown dimension element {elem_name!r}.")
221 element = graph.elements[elem_name]
222 if field_name in ("timespan.begin", "timespan.end"):
223 if not element.temporal:
224 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
225 elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
226 # Primary key is optional
227 field_name = None
228 else:
229 if not (
230 field_name in element.metadata.names
231 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)
232 ):
233 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
235 return element, field_name
238def categorizeElementOrderByName(element: DimensionElement, name: str) -> str | None:
239 """Categorize an identifier in an ORDER BY clause for a single element.
241 Parameters
242 ----------
243 element : `DimensionElement`
244 Dimension element.
245 name : `str`
246 Identifier to categorize.
248 Returns
249 -------
250 column : `str` or `None`
251 The name of a column in the table for ``element``, or `None` if
252 ``element`` is a `Dimension` and the requested column is its primary
253 key.
255 Raises
256 ------
257 ValueError
258 Raised if name is not recognized.
260 Notes
261 -----
262 For ORDER BY identifiers we use slightly different set of rules compared to
263 the rules in `categorizeElementId`:
265 - Name can be a dimension element name. e.g. ``visit``.
266 - Name can be an element name and a metadata name (or key name) separated
267 by dot, e.g. ``detector.full_name``, element name must correspond to
268 ``element`` argument
269 - Name can be a metadata name without element name prefix, e.g.
270 ``day_obs``.
271 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be
272 used with temporal elements.
273 """
274 field_name: str | None = None
275 if name in ("timespan.begin", "timespan.end"):
276 if element.temporal:
277 field_name = name
278 else:
279 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
280 elif "." not in name:
281 # No dot, can be either a dimension name or a field name (in any of
282 # the known elements)
283 if name == element.name:
284 # Must be a dimension element
285 if not isinstance(element, Dimension):
286 raise ValueError(f"Element '{element}' is not a dimension.")
287 else:
288 # Can be a metadata name or any of the keys, but primary key needs
289 # to be treated the same as a reference to the dimension name
290 # itself.
291 if isinstance(element, Dimension):
292 if name == element.primaryKey.name:
293 return None
294 elif name in element.uniqueKeys.names:
295 return name
296 if name in element.metadata.names:
297 return name
298 raise ValueError(f"Field '{name}' does not exist in '{element}'.")
299 else:
300 # qualified name, must be a dimension element and a field
301 elem_name, _, field_name = name.partition(".")
302 if elem_name != element.name:
303 if field_name == "begin" or field_name == "end":
304 extra = f"; perhaps you meant 'timespan.{field_name}'?"
305 else:
306 extra = "."
307 raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'{extra}")
308 if field_name in ("timespan.begin", "timespan.end"):
309 if not element.temporal:
310 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
311 elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
312 # Primary key is optional
313 field_name = None
314 else:
315 if not (
316 field_name in element.metadata.names
317 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)
318 ):
319 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
321 return field_name