Coverage for python / lsst / daf / butler / queries / _identifiers.py: 13%

81 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-18 08:43 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("IdentifierContext", "interpret_identifier") 

31 

32import itertools 

33from collections.abc import Mapping, Set 

34from typing import Any, cast 

35 

36from .._exceptions import InvalidQueryError 

37from ..dimensions import Dimension, DimensionGroup 

38from .tree import ( 

39 DATASET_FIELD_NAMES, 

40 ColumnExpression, 

41 DatasetFieldName, 

42 DatasetFieldReference, 

43 DimensionFieldReference, 

44 DimensionKeyReference, 

45 UnaryExpression, 

46 make_column_literal, 

47) 

48 

49 

50class IdentifierContext: # numpydoc ignore=PR01 

51 """Contextual information that helps determine the meaning of an identifier 

52 used in a query. 

53 """ 

54 

55 dimensions: DimensionGroup 

56 """Dimensions already present in the query this filter is being applied 

57 to. Returned expressions may reference dimensions outside this set. 

58 """ 

59 datasets: Set[str] 

60 """Dataset types already present in the query this filter is being applied 

61 to. Returned expressions may reference datasets outside this set. 

62 """ 

63 bind: Mapping[str, Any] 

64 """Dictionary of bind literals to match identifiers against first.""" 

65 

66 def __init__( 

67 self, dimensions: DimensionGroup, datasets: Set[str], bind: Mapping[str, Any] | None = None 

68 ) -> None: 

69 self.dimensions = dimensions 

70 self.datasets = datasets 

71 if bind is None: 

72 self.bind = {} 

73 else: 

74 self.bind = dict(bind) 

75 if len(self.bind.keys()) != len(bind.keys()): 

76 raise ValueError(f"Duplicate keys present in bind: {bind.keys()}") 

77 

78 

79def interpret_identifier(context: IdentifierContext, identifier: str) -> ColumnExpression: 

80 """Associate an identifier in a ``where`` or ``order_by`` expression with 

81 a query column or bind literal. 

82 

83 Parameters 

84 ---------- 

85 context : `IdentifierContext` 

86 Information about the query where this identifier is used. 

87 identifier : `str` 

88 String identifier to process. 

89 

90 Returns 

91 ------- 

92 expression : `ColumnExpression` 

93 Column expression corresponding to the identifier. 

94 """ 

95 dimensions = context.dimensions 

96 datasets = context.datasets 

97 bind = context.bind 

98 if identifier in bind: 

99 return make_column_literal(bind[identifier]) 

100 terms = identifier.split(".") 

101 match len(terms): 

102 case 1: 

103 if identifier in dimensions.universe.dimensions: 

104 return DimensionKeyReference.model_construct( 

105 dimension=dimensions.universe.dimensions[identifier] 

106 ) 

107 # This is an unqualified reference to a field of a dimension 

108 # element or datasets; this is okay if it's unambiguous. 

109 element_matches: set[str] = set() 

110 for element_name in dimensions.elements: 

111 element = dimensions.universe[element_name] 

112 if identifier in element.schema.names: 

113 element_matches.add(element_name) 

114 if identifier in DATASET_FIELD_NAMES: 

115 dataset_matches = set(datasets) 

116 else: 

117 dataset_matches = set() 

118 if len(element_matches) + len(dataset_matches) > 1: 

119 match_str = ", ".join( 

120 f"'{x}.{identifier}'" for x in sorted(itertools.chain(element_matches, dataset_matches)) 

121 ) 

122 raise InvalidQueryError( 

123 f"Ambiguous identifier {identifier!r} matches multiple fields: {match_str}." 

124 ) 

125 elif element_matches: 

126 element = dimensions.universe[element_matches.pop()] 

127 if isinstance(element, Dimension) and identifier == element.primary_key.name: 

128 return DimensionKeyReference(dimension=element) 

129 else: 

130 return DimensionFieldReference.model_construct(element=element, field=identifier) 

131 elif dataset_matches: 

132 return DatasetFieldReference.model_construct( 

133 dataset_type=dataset_matches.pop(), field=cast(DatasetFieldName, identifier) 

134 ) 

135 case 2: 

136 first, second = terms 

137 if first in dimensions.universe.elements.names: 

138 element = dimensions.universe[first] 

139 if second in element.schema.dimensions.names: 

140 if isinstance(element, Dimension) and second == element.primary_key.name: 

141 # Identifier is something like "visit.id" which we want 

142 # to interpret the same way as just "visit". 

143 return DimensionKeyReference.model_construct(dimension=element) 

144 else: 

145 # Identifier is something like "visit.instrument", 

146 # which we want to interpret the same way as just 

147 # "instrument". 

148 dimension = dimensions.universe.dimensions[second] 

149 return DimensionKeyReference.model_construct(dimension=dimension) 

150 elif second in element.schema.remainder.names: 

151 return DimensionFieldReference.model_construct(element=element, field=second) 

152 else: 

153 raise InvalidQueryError(f"Unrecognized field {second!r} for {first}.") 

154 elif second in DATASET_FIELD_NAMES: 

155 # We just assume the dataset type is okay; it's the job of 

156 # higher-level code to complain otherwise. 

157 return DatasetFieldReference.model_construct(dataset_type=first, field=second) 

158 if first == "timespan": 

159 base = interpret_identifier(context, "timespan") 

160 if second == "begin": 

161 return UnaryExpression(operand=base, operator="begin_of") 

162 if second == "end": 

163 return UnaryExpression(operand=base, operator="end_of") 

164 elif first in datasets: 

165 raise InvalidQueryError( 

166 f"Identifier {identifier!r} references dataset type {first!r} but field " 

167 f"{second!r} is not valid for datasets." 

168 ) 

169 case 3: 

170 base = interpret_identifier(context, ".".join(terms[:2])) 

171 if terms[2] == "begin": 

172 return UnaryExpression(operand=base, operator="begin_of") 

173 if terms[2] == "end": 

174 return UnaryExpression(operand=base, operator="end_of") 

175 raise InvalidQueryError(f"Unrecognized identifier {identifier!r}.")