Coverage for python/lsst/daf/butler/registry/queries/expressions/categorize.py: 6%
105 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 03:16 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 03:16 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = () # all symbols intentionally private; for internal package use.
31import enum
32from typing import cast
34from ....dimensions import Dimension, DimensionElement, DimensionGroup, DimensionUniverse
37class ExpressionConstant(enum.Enum):
38 """Enumeration for constants recognized in all expressions."""
40 NULL = "null"
41 INGEST_DATE = "ingest_date"
44def categorizeConstant(name: str) -> ExpressionConstant | None:
45 """Categorize an identifier in a parsed expression as one of a few global
46 constants.
48 Parameters
49 ----------
50 name : `str`
51 Identifier to categorize. Case-insensitive.
53 Returns
54 -------
55 categorized : `ExpressionConstant` or `None`
56 Enumeration value if the string represents a constant, `None`
57 otherwise.
58 """
59 try:
60 return ExpressionConstant(name.lower())
61 except ValueError:
62 return None
65def categorizeElementId(universe: DimensionUniverse, name: str) -> tuple[DimensionElement, str | None]:
66 """Categorize an identifier in a parsed expression as either a `Dimension`
67 name (indicating the primary key for that dimension) or a non-primary-key
68 column in a `DimensionElement` table.
70 Parameters
71 ----------
72 universe : `DimensionUniverse`
73 All known dimensions.
74 name : `str`
75 Identifier to categorize.
77 Returns
78 -------
79 element : `DimensionElement`
80 The `DimensionElement` the identifier refers to.
81 column : `str` or `None`
82 The name of a column in the table for ``element``, or `None` if
83 ``element`` is a `Dimension` and the requested column is its primary
84 key.
86 Raises
87 ------
88 LookupError
89 Raised if the identifier refers to a nonexistent `DimensionElement`
90 or column.
91 RuntimeError
92 Raised if the expression refers to a primary key in an illegal way.
93 This exception includes a suggestion for how to rewrite the expression,
94 so at least its message should generally be propagated up to a context
95 where the error can be interpreted by a human.
96 """
97 table, _, column = name.partition(".")
98 if column:
99 try:
100 element = universe[table]
101 except KeyError:
102 if table == "timespan" or table == "datetime" or table == "timestamp":
103 raise LookupError(
104 "Dimension element name cannot be inferred in this context; "
105 f"use <dimension>.timespan.{column} instead."
106 ) from None
107 raise LookupError(f"No dimension element with name {table!r} in {name!r}.") from None
108 if isinstance(element, Dimension) and column == element.primaryKey.name:
109 # Allow e.g. "visit.id = x" instead of just "visit = x"; this
110 # can be clearer.
111 return element, None
112 elif column in element.dimensions.names:
113 # User said something like "patch.tract = x" or
114 # "tract.tract = x" instead of just "tract = x" or
115 # "tract.id = x". Return the column as the element instead.
116 return element.dimensions[column], None
117 else:
118 return element, column
119 else:
120 try:
121 dimension = universe[table]
122 except KeyError as err:
123 raise LookupError(f"No dimension with name '{table}'.") from err
124 return dimension, None
127def categorizeOrderByName(dimensions: DimensionGroup, name: str) -> tuple[DimensionElement, str | None]:
128 """Categorize an identifier in an ORDER BY clause.
130 Parameters
131 ----------
132 dimensions : `DimensionGroup`
133 All known dimensions.
134 name : `str`
135 Identifier to categorize.
137 Returns
138 -------
139 element : `DimensionElement`
140 The `DimensionElement` the identifier refers to.
141 column : `str` or `None`
142 The name of a column in the table for ``element``, or `None` if
143 ``element`` is a `Dimension` and the requested column is its primary
144 key.
146 Raises
147 ------
148 ValueError
149 Raised if element name is not found in a dimensions, metadata name is
150 not recognized, or if there is more than one element has specified
151 metadata.
153 Notes
154 -----
155 For ORDER BY identifiers we use slightly different set of rules compared to
156 the rules in `categorizeElementId`:
158 - Name can be a dimension element name. e.g. ``visit``.
159 - Name can be an element name and a metadata name (or key name) separated
160 by dot, e.g. ``detector.full_name``.
161 - Name can be a metadata name without element name prefix, e.g.
162 ``day_obs``; in that case metadata (or key) is searched in all elements
163 present in a dimensions. Exception is raised if name appears in more than
164 one element.
165 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be
166 used with temporal elements, if element name is not given then a temporal
167 element from a dimensions is used.
168 """
169 element: DimensionElement
170 field_name: str | None = None
171 if name in ("timespan.begin", "timespan.end"):
172 matches = [
173 element
174 for element_name in dimensions.elements
175 if (element := dimensions.universe[element_name]).temporal
176 ]
177 if len(matches) == 1:
178 element = matches[0]
179 field_name = name
180 elif len(matches) > 1:
181 raise ValueError(
182 "Timespan exists in more than one dimension element "
183 f"({', '.join(element.name for element in matches)}); "
184 "qualify timespan with specific dimension name."
185 )
186 else:
187 raise ValueError(f"Cannot find any temporal dimension element for '{name}'.")
188 elif "." not in name:
189 # No dot, can be either a dimension name or a field name (in any of
190 # the known elements)
191 if name in dimensions.elements:
192 element = dimensions.universe[name]
193 else:
194 # Can be a metadata name or any of unique keys
195 match_pairs: list[tuple[DimensionElement, bool]] = [
196 (element, False)
197 for element_name in dimensions.elements
198 if name in (element := dimensions.universe[element_name]).metadata.names
199 ]
200 match_pairs += [
201 (dimension, True)
202 for dimension_name in dimensions.names
203 if name in (dimension := dimensions.universe.dimensions[dimension_name]).uniqueKeys.names
204 ]
205 if len(match_pairs) == 1:
206 element, is_dimension_key = match_pairs[0]
207 if is_dimension_key and name == cast(Dimension, element).primaryKey.name:
208 # Need to treat reference to primary key field as a
209 # reference to the dimension name.
210 return element, None
211 field_name = name
212 elif len(match_pairs) > 1:
213 raise ValueError(
214 f"Metadata '{name}' exists in more than one dimension element "
215 f"({', '.join(element.name for element, _ in match_pairs)}); "
216 "qualify field name with dimension name."
217 )
218 else:
219 raise ValueError(f"Metadata '{name}' cannot be found in any dimension.")
220 else:
221 # qualified name, must be a dimension element and a field
222 elem_name, _, field_name = name.partition(".")
223 if elem_name not in dimensions.elements:
224 if field_name == "begin" or field_name == "end":
225 raise ValueError(
226 f"Unknown dimension element {elem_name!r}; perhaps you meant 'timespan.{field_name}'?"
227 )
228 raise ValueError(f"Unknown dimension element {elem_name!r}.")
229 element = dimensions.universe[elem_name]
230 if field_name in ("timespan.begin", "timespan.end"):
231 if not element.temporal:
232 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
233 elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
234 # Primary key is optional
235 field_name = None
236 elif field_name in element.dimensions.names:
237 # Something like visit.physical_filter, which which want to remap
238 # to just "physical_filter".
239 return dimensions.universe[field_name], None
240 else:
241 if not (
242 field_name in element.metadata.names
243 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)
244 ):
245 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
247 return element, field_name
250def categorizeElementOrderByName(element: DimensionElement, name: str) -> tuple[DimensionElement, str | None]:
251 """Categorize an identifier in an ORDER BY clause for a single element.
253 Parameters
254 ----------
255 element : `DimensionElement`
256 Dimension element.
257 name : `str`
258 Identifier to categorize.
260 Returns
261 -------
262 element : `DimensionElement`
263 The `DimensionElement` the identifier refers to.
264 column : `str` or `None`
265 The name of a column in the table for ``element``, or `None` if
266 ``element`` is a `Dimension` and the requested column is its primary
267 key.
269 Raises
270 ------
271 ValueError
272 Raised if name is not recognized.
274 Notes
275 -----
276 For ORDER BY identifiers we use slightly different set of rules compared to
277 the rules in `categorizeElementId`:
279 - Name can be a dimension element name. e.g. ``visit``.
280 - Name can be an element name and a metadata name (or key name) separated
281 by dot, e.g. ``detector.full_name``, element name must correspond to
282 ``element`` argument
283 - Name can be a metadata name without element name prefix, e.g.
284 ``day_obs``.
285 - Two special identifiers ``timespan.begin`` and ``timespan.end`` can be
286 used with temporal elements.
287 """
288 field_name: str | None = None
289 if name in ("timespan.begin", "timespan.end"):
290 if element.temporal:
291 field_name = name
292 else:
293 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
294 elif "." not in name:
295 # No dot, can be either a dimension name or a field name (in any of
296 # the known elements)
297 if name == element.name:
298 # Must be a dimension element
299 if not isinstance(element, Dimension):
300 raise ValueError(f"Element '{element}' is not a dimension.")
301 elif name in element.dimensions.names and name != element.name:
302 # Something like visit.physical_filter, which which want to remap
303 # to just "physical_filter".
304 return element.universe[name], None
305 else:
306 # Can be a metadata name or any of the keys, but primary key needs
307 # to be treated the same as a reference to the dimension name
308 # itself.
309 if isinstance(element, Dimension):
310 if name == element.primaryKey.name:
311 return element, None
312 elif name in element.uniqueKeys.names:
313 return element, name
314 if name in element.metadata.names:
315 return element, name
316 raise ValueError(f"Field '{name}' does not exist in '{element}'.")
317 else:
318 # qualified name, must be a dimension element and a field
319 elem_name, _, field_name = name.partition(".")
320 if elem_name != element.name:
321 if field_name == "begin" or field_name == "end":
322 extra = f"; perhaps you meant 'timespan.{field_name}'?"
323 else:
324 extra = "."
325 raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'{extra}")
326 if field_name in ("timespan.begin", "timespan.end"):
327 if not element.temporal:
328 raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
329 elif isinstance(element, Dimension) and field_name == element.primaryKey.name:
330 # Primary key is optional
331 field_name = None
332 else:
333 if not (
334 field_name in element.metadata.names
335 or (isinstance(element, Dimension) and field_name in element.alternateKeys.names)
336 ):
337 raise ValueError(f"Field '{field_name}' does not exist in '{element}'.")
339 return element, field_name