Coverage for python / lsst / daf / butler / queries / _identifiers.py: 13%
81 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:37 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:37 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30__all__ = ("IdentifierContext", "interpret_identifier")
32import itertools
33from collections.abc import Mapping, Set
34from typing import Any, cast
36from .._exceptions import InvalidQueryError
37from ..dimensions import Dimension, DimensionGroup
38from .tree import (
39 DATASET_FIELD_NAMES,
40 ColumnExpression,
41 DatasetFieldName,
42 DatasetFieldReference,
43 DimensionFieldReference,
44 DimensionKeyReference,
45 UnaryExpression,
46 make_column_literal,
47)
50class IdentifierContext: # numpydoc ignore=PR01
51 """Contextual information that helps determine the meaning of an identifier
52 used in a query.
53 """
55 dimensions: DimensionGroup
56 """Dimensions already present in the query this filter is being applied
57 to. Returned expressions may reference dimensions outside this set.
58 """
59 datasets: Set[str]
60 """Dataset types already present in the query this filter is being applied
61 to. Returned expressions may reference datasets outside this set.
62 """
63 bind: Mapping[str, Any]
64 """Dictionary of bind literals to match identifiers against first."""
66 def __init__(
67 self, dimensions: DimensionGroup, datasets: Set[str], bind: Mapping[str, Any] | None = None
68 ) -> None:
69 self.dimensions = dimensions
70 self.datasets = datasets
71 if bind is None:
72 self.bind = {}
73 else:
74 self.bind = dict(bind)
75 if len(self.bind.keys()) != len(bind.keys()):
76 raise ValueError(f"Duplicate keys present in bind: {bind.keys()}")
79def interpret_identifier(context: IdentifierContext, identifier: str) -> ColumnExpression:
80 """Associate an identifier in a ``where`` or ``order_by`` expression with
81 a query column or bind literal.
83 Parameters
84 ----------
85 context : `IdentifierContext`
86 Information about the query where this identifier is used.
87 identifier : `str`
88 String identifier to process.
90 Returns
91 -------
92 expression : `ColumnExpression`
93 Column expression corresponding to the identifier.
94 """
95 dimensions = context.dimensions
96 datasets = context.datasets
97 bind = context.bind
98 if identifier in bind:
99 return make_column_literal(bind[identifier])
100 terms = identifier.split(".")
101 match len(terms):
102 case 1:
103 if identifier in dimensions.universe.dimensions:
104 return DimensionKeyReference.model_construct(
105 dimension=dimensions.universe.dimensions[identifier]
106 )
107 # This is an unqualified reference to a field of a dimension
108 # element or datasets; this is okay if it's unambiguous.
109 element_matches: set[str] = set()
110 for element_name in dimensions.elements:
111 element = dimensions.universe[element_name]
112 if identifier in element.schema.names:
113 element_matches.add(element_name)
114 if identifier in DATASET_FIELD_NAMES:
115 dataset_matches = set(datasets)
116 else:
117 dataset_matches = set()
118 if len(element_matches) + len(dataset_matches) > 1:
119 match_str = ", ".join(
120 f"'{x}.{identifier}'" for x in sorted(itertools.chain(element_matches, dataset_matches))
121 )
122 raise InvalidQueryError(
123 f"Ambiguous identifier {identifier!r} matches multiple fields: {match_str}."
124 )
125 elif element_matches:
126 element = dimensions.universe[element_matches.pop()]
127 if isinstance(element, Dimension) and identifier == element.primary_key.name:
128 return DimensionKeyReference(dimension=element)
129 else:
130 return DimensionFieldReference.model_construct(element=element, field=identifier)
131 elif dataset_matches:
132 return DatasetFieldReference.model_construct(
133 dataset_type=dataset_matches.pop(), field=cast(DatasetFieldName, identifier)
134 )
135 case 2:
136 first, second = terms
137 if first in dimensions.universe.elements.names:
138 element = dimensions.universe[first]
139 if second in element.schema.dimensions.names:
140 if isinstance(element, Dimension) and second == element.primary_key.name:
141 # Identifier is something like "visit.id" which we want
142 # to interpret the same way as just "visit".
143 return DimensionKeyReference.model_construct(dimension=element)
144 else:
145 # Identifier is something like "visit.instrument",
146 # which we want to interpret the same way as just
147 # "instrument".
148 dimension = dimensions.universe.dimensions[second]
149 return DimensionKeyReference.model_construct(dimension=dimension)
150 elif second in element.schema.remainder.names:
151 return DimensionFieldReference.model_construct(element=element, field=second)
152 else:
153 raise InvalidQueryError(f"Unrecognized field {second!r} for {first}.")
154 elif second in DATASET_FIELD_NAMES:
155 # We just assume the dataset type is okay; it's the job of
156 # higher-level code to complain otherwise.
157 return DatasetFieldReference.model_construct(
158 dataset_type=first, field=cast(DatasetFieldName, second)
159 )
160 if first == "timespan":
161 base = interpret_identifier(context, "timespan")
162 if second == "begin":
163 return UnaryExpression(operand=base, operator="begin_of")
164 if second == "end":
165 return UnaryExpression(operand=base, operator="end_of")
166 elif first in datasets:
167 raise InvalidQueryError(
168 f"Identifier {identifier!r} references dataset type {first!r} but field "
169 f"{second!r} is not valid for datasets."
170 )
171 case 3:
172 base = interpret_identifier(context, ".".join(terms[:2]))
173 if terms[2] == "begin":
174 return UnaryExpression(operand=base, operator="begin_of")
175 if terms[2] == "end":
176 return UnaryExpression(operand=base, operator="end_of")
177 raise InvalidQueryError(f"Unrecognized identifier {identifier!r}.")