Coverage for python/lsst/daf/butler/registry/queries/expressions/categorize.py: 10%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = () # all symbols intentionally private; for internal package use.
25import enum
26from typing import Optional, Tuple
28from ....core import Dimension, DimensionElement, DimensionGraph, DimensionUniverse
31class ExpressionConstant(enum.Enum):
32 """Enumeration for constants recognized in all expressions."""
34 NULL = "null"
35 INGEST_DATE = "ingest_date"
38def categorizeConstant(name: str) -> Optional[ExpressionConstant]:
39 """Categorize an identifier in a parsed expression as one of a few global
40 constants.
42 Parameters
43 ----------
44 name : `str`
45 Identifier to categorize. Case-insensitive.
47 Returns
48 -------
49 categorized : `ExpressionConstant` or `None`
50 Enumeration value if the string represents a constant, `None`
51 otherwise.
52 """
53 try:
54 return ExpressionConstant(name.lower())
55 except ValueError:
56 return None
59def categorizeElementId(universe: DimensionUniverse, name: str) -> Tuple[DimensionElement, Optional[str]]:
60 """Categorize an identifier in a parsed expression as either a `Dimension`
61 name (indicating the primary key for that dimension) or a non-primary-key
62 column in a `DimensionElement` table.
64 Parameters
65 ----------
66 universe : `DimensionUniverse`
67 All known dimensions.
68 name : `str`
69 Identifier to categorize.
71 Returns
72 -------
73 element : `DimensionElement`
74 The `DimensionElement` the identifier refers to.
75 column : `str` or `None`
76 The name of a column in the table for ``element``, or `None` if
77 ``element`` is a `Dimension` and the requested column is its primary
78 key.
80 Raises
81 ------
82 LookupError
83 Raised if the identifier refers to a nonexistent `DimensionElement`
84 or column.
85 RuntimeError
86 Raised if the expression refers to a primary key in an illegal way.
87 This exception includes a suggestion for how to rewrite the expression,
88 so at least its message should generally be propagated up to a context
89 where the error can be interpreted by a human.
90 """
91 table, sep, column = name.partition(".")
92 if column:
93 try:
94 element = universe[table]
95 except KeyError as err:
96 raise LookupError(f"No dimension element with name '{table}'.") from err
97 if isinstance(element, Dimension) and column == element.primaryKey.name:
98 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
99 # can be clearer.
100 return element, None
101 elif column in element.graph.names:
102 # User said something like "patch.tract = x" or
103 # "tract.tract = x" instead of just "tract = x" or
104 # "tract.id = x", which is at least needlessly confusing and
105 # possibly not actually a column name, though we can guess
106 # what they were trying to do.
107 # Encourage them to clean that up and try again.
108 raise RuntimeError(
109 f"Invalid reference to '{table}.{column}' " # type: ignore
110 f"in expression; please use '{column}' or "
111 f"'{column}.{universe[column].primaryKey.name}' instead."
112 )
113 else:
114 return element, column
115 else:
116 try:
117 dimension = universe[table]
118 except KeyError as err:
119 raise LookupError(f"No dimension with name '{table}'.") from err
120 return dimension, None
123def categorizeOrderByName(graph: DimensionGraph, name: str) -> Tuple[DimensionElement, Optional[str]]:
124 """Categorize an identifier in an ORDER BY clause.
126 Parameters
127 ----------
128 graph : `DimensionGraph`
129 All known dimensions.
130 name : `str`
131 Identifier to categorize.
133 Returns
134 -------
135 element : `DimensionElement`
136 The `DimensionElement` the identifier refers to.
137 column : `str` or `None`
138 The name of a column in the table for ``element``, or `None` if
139 ``element`` is a `Dimension` and the requested column is its primary
140 key.
142 Raises
143 ------
144 ValueError
145 Raised if element name is not found in a graph, metadata name is not
146 recognized, or if there is more than one element has specified
147 metadata.
149 Notes
150 -----
151 For ORDER BY identifiers we use slightly different set of rules compared to
152 the rules in `categorizeElementId`:
154 - Name can be a dimension element name. e.g. ``visit``.
155 - Name can be an element name and a metadata name (or key name) separated
156 by dot, e.g. ``detector.full_name``.
157 - Name can be a metadata name without element name prefix, e.g.
158 ``day_obs``; in that case metadata (or key) is searched in all elements
159 present in a graph. Exception is raised if name appears in more than one
160 element.
161 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be
162 used with temporal elements, if element name is not given then a temporal
163 element from a graph is used.
164 """
165 element: DimensionElement
166 field_name: Optional[str] = None
167 if name in ("timespan.begin", "timespan.end"):
168 matches = [element for element in graph.elements if element.temporal]
169 if len(matches) == 1:
170 element = matches[0]
171 field_name = name
172 elif len(matches) > 1:
173 raise ValueError(
174 f"Timespan exists in more than one dimesion element: {matches},"
175 " qualify timespan with specific dimension name."
176 )
177 else:
178 raise ValueError(f"Cannot find any temporal dimension element for '{name}'.")
179 elif "." not in name:
180 # No dot, can be either a dimension name or a field name (in any of
181 # the known elements)
182 if name in graph.elements.names:
183 element = graph.elements[name]
184 else:
185 # Can be a metadata name or any of unique keys
186 matches = [elem for elem in graph.elements if name in elem.metadata.names]
187 matches += [dim for dim in graph if name in dim.uniqueKeys.names]
188 if len(matches) == 1:
189 element = matches[0]
190 field_name = name
191 elif len(matches) > 1:
192 raise ValueError(
193 f"Metadata '{name}' exists in more than one dimension element: {matches},"
194 " qualify metadata name with dimension name."
195 )
196 else:
197 raise ValueError(f"Metadata '{name}' cannot be found in any dimension.")
198 else:
199 # qualified name, must be a dimension element and a field
200 elem_name, _, field_name = name.partition(".")
201 if elem_name not in graph.elements.names:
202 raise ValueError(f"Unknown dimension element name '{elem_name}'")
203 element = graph.elements[elem_name]
204 if field_name in ("timespan.begin", "timespan.end"):
205 if not element.temporal:
206 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
207 elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
208 # Primary key is optional
209 field_name = None
210 else:
211 if not (
212 field_name in element.metadata.names
213 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)
214 ):
215 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
217 return element, field_name
220def categorizeElementOrderByName(element: DimensionElement, name: str) -> Optional[str]:
221 """Categorize an identifier in an ORDER BY clause for a single element.
223 Parameters
224 ----------
225 element : `DimensionElement`
226 Dimension element.
227 name : `str`
228 Identifier to categorize.
230 Returns
231 -------
232 column : `str` or `None`
233 The name of a column in the table for ``element``, or `None` if
234 ``element`` is a `Dimension` and the requested column is its primary
235 key.
237 Raises
238 ------
239 ValueError
240 Raised if name is not recognized.
242 Notes
243 -----
244 For ORDER BY identifiers we use slightly different set of rules compared to
245 the rules in `categorizeElementId`:
247 - Name can be a dimension element name. e.g. ``visit``.
248 - Name can be an element name and a metadata name (or key name) separated
249 by dot, e.g. ``detector.full_name``, element name must correspond to
250 ``element`` argument
251 - Name can be a metadata name without element name prefix, e.g.
252 ``day_obs``.
253 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be
254 used with temporal elements.
255 """
256 field_name: Optional[str] = None
257 if name in ("timespan.begin", "timespan.end"):
258 if element.temporal:
259 field_name = name
260 else:
261 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
262 elif "." not in name:
263 # No dot, can be either a dimension name or a field name (in any of
264 # the known elements)
265 if name == element.name:
266 # Must be a dimension element
267 if not isinstance(element, Dimension):
268 raise ValueError(f"Element '{element}' is not a dimension.")
269 else:
270 # Can be a metadata name or any of the keys
271 if name in element.metadata.names or (
272 isinstance(element, Dimension) and name in element.uniqueKeys.names
273 ):
274 field_name = name
275 else:
276 raise ValueError(f"Field '{name}' does not exist in '{element}'.")
277 else:
278 # qualified name, must be a dimension element and a field
279 elem_name, _, field_name = name.partition(".")
280 if elem_name != element.name:
281 raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'")
282 if field_name in ("timespan.begin", "timespan.end"):
283 if not element.temporal:
284 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
285 elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
286 # Primary key is optional
287 field_name = None
288 else:
289 if not (
290 field_name in element.metadata.names
291 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)
292 ):
293 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
295 return field_name