Coverage for python/lsst/daf/butler/registry/queries/expressions/categorize.py: 9%
88 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-07-03 01:08 -0700
« prev ^ index » next coverage.py v6.4.1, created at 2022-07-03 01:08 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
25import enum
26from typing import Optional, Tuple
28from ....core import Dimension, DimensionElement, DimensionGraph, DimensionUniverse
31class ExpressionConstant(enum.Enum):
32 """Enumeration for constants recognized in all expressions."""
34 NULL = "null"
35 INGEST_DATE = "ingest_date"
38def categorizeConstant(name: str) -> Optional[ExpressionConstant]:
39 """Categorize an identifier in a parsed expression as one of a few global
40 constants.
42 Parameters
43 ----------
44 name : `str`
45 Identifier to categorize. Case-insensitive.
47 Returns
48 -------
49 categorized : `ExpressionConstant` or `None`
50 Enumeration value if the string represents a constant, `None`
51 otherwise.
52 """
53 try:
54 return ExpressionConstant(name.lower())
55 except ValueError:
56 return None
59def categorizeElementId(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]:
60 """Categorize an identifier in a parsed expression as either a `Dimension`
61 name (indicating the primary key for that dimension) or a non-primary-key
62 column in a `DimensionElement` table.
64 Parameters
65 ----------
66 universe : `DimensionUniverse`
67 All known dimensions.
68 name : `str`
69 Identifier to categorize.
71 Returns
72 -------
73 element : `DimensionElement`
74 The `DimensionElement` the identifier refers to.
75 column : `str` or `None`
76 The name of a column in the table for ``element``, or `None` if
77 ``element`` is a `Dimension` and the requested column is its primary
78 key.
80 Raises
81 ------
82 LookupError
83 Raised if the identifier refers to a nonexistent `DimensionElement`
84 or column.
85 RuntimeError
86 Raised if the expression refers to a primary key in an illegal way.
87 This exception includes a suggestion for how to rewrite the expression,
88 so at least its message should generally be propagated up to a context
89 where the error can be interpreted by a human.
90 """
91 table, sep, column = name.partition(".")
92 if column:
93 try:
94 element = universe[table]
95 except KeyError as err:
96 raise LookupError(f"No dimension element with name '{table}'.") from err
97 if isinstance(element, Dimension) and column == element.primaryKey.name:
98 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
99 # can be clearer.
100 return element, None
101 elif column in element.graph.names:
102 # User said something like "patch.tract = x" or
103 # "tract.tract = x" instead of just "tract = x" or
104 # "tract.id = x", which is at least needlessly confusing and
105 # possibly not actually a column name, though we can guess
106 # what they were trying to do.
107 # Encourage them to clean that up and try again.
108 name = universe[column].primaryKey.name # type: ignore
109 raise RuntimeError(
110 f"Invalid reference to '{table}.{column}' "
111 f"in expression; please use '{column}' or "
112 f"'{column}.{name}' instead."
113 )
114 else:
115 return element, column
116 else:
117 try:
118 dimension = universe[table]
119 except KeyError as err:
120 raise LookupError(f"No dimension with name '{table}'.") from err
121 return dimension, None
124def categorizeOrderByName(graph: DimensionGraph, name: str) -> Tuple[DimensionElement, Optional[str]]:
125 """Categorize an identifier in an ORDER BY clause.
127 Parameters
128 ----------
129 graph : `DimensionGraph`
130 All known dimensions.
131 name : `str`
132 Identifier to categorize.
134 Returns
135 -------
136 element : `DimensionElement`
137 The `DimensionElement` the identifier refers to.
138 column : `str` or `None`
139 The name of a column in the table for ``element``, or `None` if
140 ``element`` is a `Dimension` and the requested column is its primary
141 key.
143 Raises
144 ------
145 ValueError
146 Raised if element name is not found in a graph, metadata name is not
147 recognized, or if there is more than one element has specified
148 metadata.
150 Notes
151 -----
152 For ORDER BY identifiers we use slightly different set of rules compared to
153 the rules in `categorizeElementId`:
155 - Name can be a dimension element name. e.g. ``visit``.
156 - Name can be an element name and a metadata name (or key name) separated
157 by dot, e.g. ``detector.full_name``.
158 - Name can be a metadata name without element name prefix, e.g.
159 ``day_obs``; in that case metadata (or key) is searched in all elements
160 present in a graph. Exception is raised if name appears in more than one
161 element.
162 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be
163 used with temporal elements, if element name is not given then a temporal
164 element from a graph is used.
165 """
166 element: DimensionElement
167 field_name: Optional[str] = None
168 if name in ("timespan.begin", "timespan.end"):
169 matches = [element for element in graph.elements if element.temporal]
170 if len(matches) == 1:
171 element = matches[0]
172 field_name = name
173 elif len(matches) > 1:
174 raise ValueError(
175 f"Timespan exists in more than one dimesion element: {matches},"
176 " qualify timespan with specific dimension name."
177 )
178 else:
179 raise ValueError(f"Cannot find any temporal dimension element for '{name}'.")
180 elif "." not in name:
181 # No dot, can be either a dimension name or a field name (in any of
182 # the known elements)
183 if name in graph.elements.names:
184 element = graph.elements[name]
185 else:
186 # Can be a metadata name or any of unique keys
187 matches = [elem for elem in graph.elements if name in elem.metadata.names]
188 matches += [dim for dim in graph if name in dim.uniqueKeys.names]
189 if len(matches) == 1:
190 element = matches[0]
191 field_name = name
192 elif len(matches) > 1:
193 raise ValueError(
194 f"Metadata '{name}' exists in more than one dimension element: {matches},"
195 " qualify metadata name with dimension name."
196 )
197 else:
198 raise ValueError(f"Metadata '{name}' cannot be found in any dimension.")
199 else:
200 # qualified name, must be a dimension element and a field
201 elem_name, _, field_name = name.partition(".")
202 if elem_name not in graph.elements.names:
203 raise ValueError(f"Unknown dimension element name '{elem_name}'")
204 element = graph.elements[elem_name]
205 if field_name in ("timespan.begin", "timespan.end"):
206 if not element.temporal:
207 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
208 elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
209 # Primary key is optional
210 field_name = None
211 else:
212 if not (
213 field_name in element.metadata.names
214 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)
215 ):
216 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
218 return element, field_name
221def categorizeElementOrderByName(element: DimensionElement, name: str) -> Optional[str]:
222 """Categorize an identifier in an ORDER BY clause for a single element.
224 Parameters
225 ----------
226 element : `DimensionElement`
227 Dimension element.
228 name : `str`
229 Identifier to categorize.
231 Returns
232 -------
233 column : `str` or `None`
234 The name of a column in the table for ``element``, or `None` if
235 ``element`` is a `Dimension` and the requested column is its primary
236 key.
238 Raises
239 ------
240 ValueError
241 Raised if name is not recognized.
243 Notes
244 -----
245 For ORDER BY identifiers we use slightly different set of rules compared to
246 the rules in `categorizeElementId`:
248 - Name can be a dimension element name. e.g. ``visit``.
249 - Name can be an element name and a metadata name (or key name) separated
250 by dot, e.g. ``detector.full_name``, element name must correspond to
251 ``element`` argument
252 - Name can be a metadata name without element name prefix, e.g.
253 ``day_obs``.
254 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be
255 used with temporal elements.
256 """
257 field_name: Optional[str] = None
258 if name in ("timespan.begin", "timespan.end"):
259 if element.temporal:
260 field_name = name
261 else:
262 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
263 elif "." not in name:
264 # No dot, can be either a dimension name or a field name (in any of
265 # the known elements)
266 if name == element.name:
267 # Must be a dimension element
268 if not isinstance(element, Dimension):
269 raise ValueError(f"Element '{element}' is not a dimension.")
270 else:
271 # Can be a metadata name or any of the keys
272 if name in element.metadata.names or (
273 isinstance(element, Dimension) and name in element.uniqueKeys.names
274 ):
275 field_name = name
276 else:
277 raise ValueError(f"Field '{name}' does not exist in '{element}'.")
278 else:
279 # qualified name, must be a dimension element and a field
280 elem_name, _, field_name = name.partition(".")
281 if elem_name != element.name:
282 raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'")
283 if field_name in ("timespan.begin", "timespan.end"):
284 if not element.temporal:
285 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
286 elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
287 # Primary key is optional
288 field_name = None
289 else:
290 if not (
291 field_name in element.metadata.names
292 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)
293 ):
294 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
296 return field_name