Coverage for python/lsst/daf/butler/registry/queries/expressions/categorize.py: 7%
95 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-02 02:16 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-02 02:16 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
25import enum
26from typing import Optional, Tuple, cast
28from ....core import Dimension, DimensionElement, DimensionGraph, DimensionUniverse
31class ExpressionConstant(enum.Enum):
32 """Enumeration for constants recognized in all expressions."""
34 NULL = "null"
35 INGEST_DATE = "ingest_date"
38def categorizeConstant(name: str) -> Optional[ExpressionConstant]:
39 """Categorize an identifier in a parsed expression as one of a few global
40 constants.
42 Parameters
43 ----------
44 name : `str`
45 Identifier to categorize. Case-insensitive.
47 Returns
48 -------
49 categorized : `ExpressionConstant` or `None`
50 Enumeration value if the string represents a constant, `None`
51 otherwise.
52 """
53 try:
54 return ExpressionConstant(name.lower())
55 except ValueError:
56 return None
59def categorizeElementId(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]:
60 """Categorize an identifier in a parsed expression as either a `Dimension`
61 name (indicating the primary key for that dimension) or a non-primary-key
62 column in a `DimensionElement` table.
64 Parameters
65 ----------
66 universe : `DimensionUniverse`
67 All known dimensions.
68 name : `str`
69 Identifier to categorize.
71 Returns
72 -------
73 element : `DimensionElement`
74 The `DimensionElement` the identifier refers to.
75 column : `str` or `None`
76 The name of a column in the table for ``element``, or `None` if
77 ``element`` is a `Dimension` and the requested column is its primary
78 key.
80 Raises
81 ------
82 LookupError
83 Raised if the identifier refers to a nonexistent `DimensionElement`
84 or column.
85 RuntimeError
86 Raised if the expression refers to a primary key in an illegal way.
87 This exception includes a suggestion for how to rewrite the expression,
88 so at least its message should generally be propagated up to a context
89 where the error can be interpreted by a human.
90 """
91 table, sep, column = name.partition(".")
92 if column:
93 try:
94 element = universe[table]
95 except KeyError as err:
96 raise LookupError(f"No dimension element with name '{table}'.") from err
97 if isinstance(element, Dimension) and column == element.primaryKey.name:
98 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
99 # can be clearer.
100 return element, None
101 elif column in element.graph.names:
102 # User said something like "patch.tract = x" or
103 # "tract.tract = x" instead of just "tract = x" or
104 # "tract.id = x", which is at least needlessly confusing and
105 # possibly not actually a column name, though we can guess
106 # what they were trying to do.
107 # Encourage them to clean that up and try again.
108 name = universe[column].primaryKey.name # type: ignore
109 raise RuntimeError(
110 f"Invalid reference to '{table}.{column}' "
111 f"in expression; please use '{column}' or "
112 f"'{column}.{name}' instead."
113 )
114 else:
115 return element, column
116 else:
117 try:
118 dimension = universe[table]
119 except KeyError as err:
120 raise LookupError(f"No dimension with name '{table}'.") from err
121 return dimension, None
124def categorizeOrderByName(graph: DimensionGraph, name: str) -> Tuple[DimensionElement, Optional[str]]:
125 """Categorize an identifier in an ORDER BY clause.
127 Parameters
128 ----------
129 graph : `DimensionGraph`
130 All known dimensions.
131 name : `str`
132 Identifier to categorize.
134 Returns
135 -------
136 element : `DimensionElement`
137 The `DimensionElement` the identifier refers to.
138 column : `str` or `None`
139 The name of a column in the table for ``element``, or `None` if
140 ``element`` is a `Dimension` and the requested column is its primary
141 key.
143 Raises
144 ------
145 ValueError
146 Raised if element name is not found in a graph, metadata name is not
147 recognized, or if there is more than one element has specified
148 metadata.
150 Notes
151 -----
152 For ORDER BY identifiers we use slightly different set of rules compared to
153 the rules in `categorizeElementId`:
155 - Name can be a dimension element name. e.g. ``visit``.
156 - Name can be an element name and a metadata name (or key name) separated
157 by dot, e.g. ``detector.full_name``.
158 - Name can be a metadata name without element name prefix, e.g.
159 ``day_obs``; in that case metadata (or key) is searched in all elements
160 present in a graph. Exception is raised if name appears in more than one
161 element.
162 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be
163 used with temporal elements, if element name is not given then a temporal
164 element from a graph is used.
165 """
166 element: DimensionElement
167 field_name: Optional[str] = None
168 if name in ("timespan.begin", "timespan.end"):
169 matches = [element for element in graph.elements if element.temporal]
170 if len(matches) == 1:
171 element = matches[0]
172 field_name = name
173 elif len(matches) > 1:
174 raise ValueError(
175 f"Timespan exists in more than one dimesion element: {matches},"
176 " qualify timespan with specific dimension name."
177 )
178 else:
179 raise ValueError(f"Cannot find any temporal dimension element for '{name}'.")
180 elif "." not in name:
181 # No dot, can be either a dimension name or a field name (in any of
182 # the known elements)
183 if name in graph.elements.names:
184 element = graph.elements[name]
185 else:
186 # Can be a metadata name or any of unique keys
187 match_pairs: list[tuple[DimensionElement, bool]] = [
188 (elem, False) for elem in graph.elements if name in elem.metadata.names
189 ]
190 match_pairs += [(dim, True) for dim in graph if name in dim.uniqueKeys.names]
191 if len(match_pairs) == 1:
192 element, is_dimension_key = match_pairs[0]
193 if is_dimension_key and name == cast(Dimension, element).primaryKey.name:
194 # Need to treat reference to primary key field as a
195 # reference to the dimension name.
196 return element, None
197 field_name = name
198 elif len(match_pairs) > 1:
199 raise ValueError(
200 f"Metadata '{name}' exists in more than one dimension element: "
201 f"{[element for element, _ in match_pairs]}, qualify metadata name with dimension name."
202 )
203 else:
204 raise ValueError(f"Metadata '{name}' cannot be found in any dimension.")
205 else:
206 # qualified name, must be a dimension element and a field
207 elem_name, _, field_name = name.partition(".")
208 if elem_name not in graph.elements.names:
209 raise ValueError(f"Unknown dimension element name '{elem_name}'")
210 element = graph.elements[elem_name]
211 if field_name in ("timespan.begin", "timespan.end"):
212 if not element.temporal:
213 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
214 elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
215 # Primary key is optional
216 field_name = None
217 else:
218 if not (
219 field_name in element.metadata.names
220 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)
221 ):
222 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
224 return element, field_name
227def categorizeElementOrderByName(element: DimensionElement, name: str) -> Optional[str]:
228 """Categorize an identifier in an ORDER BY clause for a single element.
230 Parameters
231 ----------
232 element : `DimensionElement`
233 Dimension element.
234 name : `str`
235 Identifier to categorize.
237 Returns
238 -------
239 column : `str` or `None`
240 The name of a column in the table for ``element``, or `None` if
241 ``element`` is a `Dimension` and the requested column is its primary
242 key.
244 Raises
245 ------
246 ValueError
247 Raised if name is not recognized.
249 Notes
250 -----
251 For ORDER BY identifiers we use slightly different set of rules compared to
252 the rules in `categorizeElementId`:
254 - Name can be a dimension element name. e.g. ``visit``.
255 - Name can be an element name and a metadata name (or key name) separated
256 by dot, e.g. ``detector.full_name``, element name must correspond to
257 ``element`` argument
258 - Name can be a metadata name without element name prefix, e.g.
259 ``day_obs``.
260 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be
261 used with temporal elements.
262 """
263 field_name: Optional[str] = None
264 if name in ("timespan.begin", "timespan.end"):
265 if element.temporal:
266 field_name = name
267 else:
268 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
269 elif "." not in name:
270 # No dot, can be either a dimension name or a field name (in any of
271 # the known elements)
272 if name == element.name:
273 # Must be a dimension element
274 if not isinstance(element, Dimension):
275 raise ValueError(f"Element '{element}' is not a dimension.")
276 else:
277 # Can be a metadata name or any of the keys, but primary key needs
278 # to be treated the same as a reference to the dimension name
279 # itself.
280 if isinstance(element, Dimension):
281 if name == element.primaryKey.name:
282 return None
283 elif name in element.uniqueKeys.names:
284 return name
285 if name in element.metadata.names:
286 return name
287 raise ValueError(f"Field '{name}' does not exist in '{element}'.")
288 else:
289 # qualified name, must be a dimension element and a field
290 elem_name, _, field_name = name.partition(".")
291 if elem_name != element.name:
292 raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'")
293 if field_name in ("timespan.begin", "timespan.end"):
294 if not element.temporal:
295 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
296 elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
297 # Primary key is optional
298 field_name = None
299 else:
300 if not (
301 field_name in element.metadata.names
302 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)
303 ):
304 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
306 return field_name