Coverage for python/lsst/daf/butler/queries/_identifiers.py: 13%

80 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-08 02:51 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("interpret_identifier", "IdentifierContext") 

31 

32import itertools 

33from collections.abc import Mapping, Set 

34from typing import Any, cast 

35 

36from .._exceptions import InvalidQueryError 

37from ..dimensions import Dimension, DimensionGroup 

38from .tree import ( 

39 DATASET_FIELD_NAMES, 

40 ColumnExpression, 

41 DatasetFieldName, 

42 DatasetFieldReference, 

43 DimensionFieldReference, 

44 DimensionKeyReference, 

45 UnaryExpression, 

46 make_column_literal, 

47) 

48 

49 

50class IdentifierContext: # numpydoc ignore=PR01 

51 """Contextual information that helps determine the meaning of an identifier 

52 used in a query. 

53 """ 

54 

55 dimensions: DimensionGroup 

56 """Dimensions already present in the query this filter is being applied 

57 to. Returned expressions may reference dimensions outside this set. 

58 """ 

59 datasets: Set[str] 

60 """Dataset types already present in the query this filter is being applied 

61 to. Returned expressions may reference datasets outside this set. 

62 """ 

63 bind: Mapping[str, Any] 

64 """Dictionary of bind literals to match identifiers against first.""" 

65 

66 def __init__( 

67 self, dimensions: DimensionGroup, datasets: Set[str], bind: Mapping[str, Any] | None = None 

68 ) -> None: 

69 self.dimensions = dimensions 

70 self.datasets = datasets 

71 if bind is None: 

72 self.bind = {} 

73 else: 

74 # Make bind names case-insensitive. 

75 self.bind = {k.lower(): v for k, v in bind.items()} 

76 if len(self.bind.keys()) != len(bind.keys()): 

77 raise ValueError(f"Duplicate keys present in bind: {bind.keys()}") 

78 

79 

80def interpret_identifier(context: IdentifierContext, identifier: str) -> ColumnExpression: 

81 """Associate an identifier in a ``where`` or ``order_by`` expression with 

82 a query column or bind literal. 

83 

84 Parameters 

85 ---------- 

86 context : `IdentifierContext` 

87 Information about the query where this identifier is used. 

88 identifier : `str` 

89 String identifier to process. 

90 

91 Returns 

92 ------- 

93 expression : `ColumnExpression` 

94 Column expression corresponding to the identifier. 

95 """ 

96 dimensions = context.dimensions 

97 datasets = context.datasets 

98 bind = context.bind 

99 # Make identifiers case-insensitive. 

100 identifier = identifier.lower() 

101 

102 if identifier in bind: 

103 return make_column_literal(bind[identifier]) 

104 terms = identifier.split(".") 

105 match len(terms): 

106 case 1: 

107 if identifier in dimensions.universe.dimensions: 

108 return DimensionKeyReference.model_construct( 

109 dimension=dimensions.universe.dimensions[identifier] 

110 ) 

111 # This is an unqualified reference to a field of a dimension 

112 # element or datasets; this is okay if it's unambiguous. 

113 element_matches: set[str] = set() 

114 for element_name in dimensions.elements: 

115 element = dimensions.universe[element_name] 

116 if identifier in element.schema.names: 

117 element_matches.add(element_name) 

118 if identifier in DATASET_FIELD_NAMES: 

119 dataset_matches = set(datasets) 

120 else: 

121 dataset_matches = set() 

122 if len(element_matches) + len(dataset_matches) > 1: 

123 match_str = ", ".join( 

124 f"'{x}.{identifier}'" for x in sorted(itertools.chain(element_matches, dataset_matches)) 

125 ) 

126 raise InvalidQueryError( 

127 f"Ambiguous identifier {identifier!r} matches multiple fields: {match_str}." 

128 ) 

129 elif element_matches: 

130 element = dimensions.universe[element_matches.pop()] 

131 return DimensionFieldReference.model_construct(element=element, field=identifier) 

132 elif dataset_matches: 

133 return DatasetFieldReference.model_construct( 

134 dataset_type=dataset_matches.pop(), field=cast(DatasetFieldName, identifier) 

135 ) 

136 case 2: 

137 first, second = terms 

138 if first in dimensions.universe.elements.names: 

139 element = dimensions.universe[first] 

140 if second in element.schema.dimensions.names: 

141 if isinstance(element, Dimension) and second == element.primary_key.name: 

142 # Identifier is something like "visit.id" which we want 

143 # to interpret the same way as just "visit". 

144 return DimensionKeyReference.model_construct(dimension=element) 

145 else: 

146 # Identifier is something like "visit.instrument", 

147 # which we want to interpret the same way as just 

148 # "instrument". 

149 dimension = dimensions.universe.dimensions[second] 

150 return DimensionKeyReference.model_construct(dimension=dimension) 

151 elif second in element.schema.remainder.names: 

152 return DimensionFieldReference.model_construct(element=element, field=second) 

153 else: 

154 raise InvalidQueryError(f"Unrecognized field {second!r} for {first}.") 

155 elif second in DATASET_FIELD_NAMES: 

156 # We just assume the dataset type is okay; it's the job of 

157 # higher-level code to complain otherwise. 

158 return DatasetFieldReference.model_construct( 

159 dataset_type=first, field=cast(DatasetFieldName, second) 

160 ) 

161 if first == "timespan": 

162 base = interpret_identifier(context, "timespan") 

163 if second == "begin": 

164 return UnaryExpression(operand=base, operator="begin_of") 

165 if second == "end": 

166 return UnaryExpression(operand=base, operator="end_of") 

167 elif first in datasets: 

168 raise InvalidQueryError( 

169 f"Identifier {identifier!r} references dataset type {first!r} but field " 

170 f"{second!r} is not valid for datasets." 

171 ) 

172 case 3: 

173 base = interpret_identifier(context, ".".join(terms[:2])) 

174 if terms[2] == "begin": 

175 return UnaryExpression(operand=base, operator="begin_of") 

176 if terms[2] == "end": 

177 return UnaryExpression(operand=base, operator="end_of") 

178 raise InvalidQueryError(f"Unrecognized identifier {identifier!r}.")