Coverage for python/lsst/daf/butler/queries/_expression_strings.py: 26%

122 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-11 03:15 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30from collections.abc import Set 

31from typing import Literal, NamedTuple, TypeAlias 

32 

33import astropy.time 

34 

35from .._exceptions import InvalidQueryError 

36from .._timespan import Timespan 

37from ..column_spec import ColumnType 

38from ..dimensions import DimensionUniverse 

39from ..registry.queries.expressions.categorize import ExpressionConstant, categorizeConstant 

40from ..registry.queries.expressions.parser import Node, RangeLiteral, TreeVisitor, parse_expression 

41from ._identifiers import IdentifierContext, interpret_identifier 

42from .tree import ( 

43 BinaryExpression, 

44 ColumnExpression, 

45 ComparisonOperator, 

46 LiteralValue, 

47 Predicate, 

48 UnaryExpression, 

49 make_column_literal, 

50) 

51 

52BindValue = LiteralValue | list[LiteralValue] | tuple[LiteralValue] | Set[LiteralValue] 

53 

54 

55def convert_expression_string_to_predicate( 

56 expression: str, *, context: IdentifierContext, universe: DimensionUniverse 

57) -> Predicate: 

58 """Convert a Butler query expression string to a `Predicate` for use in a 

59 QueryTree. 

60 

61 Parameters 

62 ---------- 

63 expression : `str` 

64 Butler expression query string, as used by the old query system to 

65 specify filtering. 

66 context : `IdentifierContext` 

67 Contextual information that helps determine the meaning of an 

68 identifier used in a query. 

69 universe : `DimensionUniverse` 

70 Dimension metadata for the Butler database being queried. 

71 

72 Returns 

73 ------- 

74 predicate : `Predicate` 

75 Predicate corresponding to that filter, for use in `QueryTree`. 

76 """ 

77 try: 

78 tree = parse_expression(expression) 

79 except Exception as exc: 

80 raise InvalidQueryError(f"Failed to parse expression '{expression}'") from exc 

81 

82 converter = _ConversionVisitor(context, universe) 

83 predicate = tree.visit(converter) 

84 assert isinstance( 

85 predicate, Predicate 

86 ), "The grammar should guarantee that we get a predicate back at the top level." 

87 

88 return predicate 

89 

90 

91class _ColExpr(NamedTuple): 

92 """Represents a portion of the original expression that has been converted 

93 to a ColumnExpression object. 

94 """ 

95 

96 # This wrapper object mostly exists to help with typing and match() -- 

97 # ColumnExpression is a big discriminated union, and mypy was having a lot 

98 # of trouble dealing with it in the context of _VisitorResult's extra 

99 # layers of union. 

100 

101 value: ColumnExpression 

102 

103 @property 

104 def column_type(self) -> ColumnType: 

105 return self.value.column_type 

106 

107 

108class _Null: 

109 """Class representing a literal 'null' value in the expression.""" 

110 

111 column_type: Literal["null"] = "null" 

112 

113 

114class _RangeLiteral(NamedTuple): 

115 """Class representing a range expression.""" 

116 

117 value: RangeLiteral 

118 column_type: Literal["range"] = "range" 

119 

120 

121class _Sequence(NamedTuple): 

122 value: list[ColumnExpression] 

123 column_type: Literal["sequence"] = "sequence" 

124 

125 

126_VisitorResult: TypeAlias = Predicate | _ColExpr | _Null | _RangeLiteral | _Sequence 

127 

128 

129class _ConversionVisitor(TreeVisitor[_VisitorResult]): 

130 def __init__(self, context: IdentifierContext, universe: DimensionUniverse): 

131 super().__init__() 

132 self.context = context 

133 self.universe = universe 

134 

135 def visitBinaryOp( 

136 self, operator: str, lhs: _VisitorResult, rhs: _VisitorResult, node: Node 

137 ) -> _VisitorResult: 

138 match (operator, lhs, rhs): 

139 # Handle boolean operators. 

140 case ["OR", Predicate() as lhs, Predicate() as rhs]: 

141 return lhs.logical_or(rhs) 

142 case ["AND", Predicate() as lhs, Predicate() as rhs]: 

143 return lhs.logical_and(rhs) 

144 

145 # Handle comparison operators. 

146 case [("=" | "!=" | "<" | ">" | "<=" | ">="), _ColExpr() as lhs, _ColExpr() as rhs]: 

147 return Predicate.compare( 

148 a=lhs.value, b=rhs.value, operator=_convert_comparison_operator(operator) 

149 ) 

150 

151 # Allow equality comparisons with None/NULL. We don't have an 'IS' 

152 # operator. 

153 case ["=", _ColExpr() as lhs, _Null()]: 

154 return Predicate.is_null(lhs.value) 

155 case ["!=", _ColExpr() as lhs, _Null()]: 

156 return Predicate.is_null(lhs.value).logical_not() 

157 case ["=", _Null(), _ColExpr() as rhs]: 

158 return Predicate.is_null(rhs.value) 

159 case ["!=", _Null(), _ColExpr() as rhs]: 

160 return Predicate.is_null(rhs.value).logical_not() 

161 

162 # Handle arithmetic operations 

163 case [("+" | "-" | "*" | "/" | "%") as op, _ColExpr() as lhs, _ColExpr() as rhs]: 

164 return _ColExpr(BinaryExpression(a=lhs.value, b=rhs.value, operator=op)) 

165 

166 raise InvalidQueryError( 

167 f"Invalid types {lhs.column_type}, {rhs.column_type} for binary operator {operator!r} " 

168 f"in expression {node!s}." 

169 ) 

170 

171 def visitIsIn( 

172 self, lhs: _VisitorResult, values: list[_VisitorResult], not_in: bool, node: Node 

173 ) -> _VisitorResult: 

174 raise NotImplementedError("IN not supported yet") 

175 

176 def visitIdentifier(self, name: str, node: Node) -> _VisitorResult: 

177 name = name.lower() 

178 

179 if name in self.context.bind: 

180 value = self.context.bind[name] 

181 # Lists of values do not have a direct representation in the new 

182 # query system, so we have to handle them separately here. 

183 if isinstance(value, list | tuple | Set): 

184 literals: list[ColumnExpression] = [make_column_literal(item) for item in value] 

185 types = set({item.column_type for item in literals}) 

186 if len(types) > 1: 

187 raise InvalidQueryError( 

188 f"Mismatched types in bind iterable: {value} has a mix of {types}." 

189 ) 

190 return _Sequence(literals) 

191 

192 # The other constants are handled in interpret_identifier(). 

193 if categorizeConstant(name) == ExpressionConstant.NULL: 

194 return _Null() 

195 

196 return _ColExpr(interpret_identifier(self.context, name)) 

197 

198 def visitNumericLiteral(self, value: str, node: Node) -> _VisitorResult: 

199 numeric: int | float 

200 try: 

201 numeric = int(value) 

202 except ValueError: 

203 # int() raises for float-like strings 

204 numeric = float(value) 

205 return _make_literal(numeric) 

206 

207 def visitParens(self, expression: _VisitorResult, node: Node) -> _VisitorResult: 

208 return expression 

209 

210 def visitPointNode(self, ra: _VisitorResult, dec: _VisitorResult, node: Node) -> _VisitorResult: 

211 raise NotImplementedError("POINT() function is not supported yet") 

212 

213 def visitRangeLiteral( 

214 self, start: int, stop: int, stride: int | None, node: RangeLiteral 

215 ) -> _VisitorResult: 

216 # Consumed by visitIsIn. 

217 return _RangeLiteral(node) 

218 

219 def visitStringLiteral(self, value: str, node: Node) -> _VisitorResult: 

220 return _make_literal(value) 

221 

222 def visitTimeLiteral(self, value: astropy.time.Time, node: Node) -> _VisitorResult: 

223 return _make_literal(value) 

224 

225 def visitTupleNode(self, items: tuple[_VisitorResult, ...], node: Node) -> _VisitorResult: 

226 if len(items) != 2: 

227 raise InvalidQueryError(f"Timespan tuple should have exactly two items (begin, end) in '{node}'") 

228 

229 begin = _to_timespan_bound(items[0], node) 

230 end = _to_timespan_bound(items[1], node) 

231 return _make_literal(Timespan(begin, end)) 

232 

233 def visitUnaryOp(self, operator: str, operand: _VisitorResult, node: Node) -> _VisitorResult: 

234 # Docstring inherited. 

235 match (operator, operand): 

236 case ["NOT", Predicate() as operand]: 

237 return operand.logical_not() 

238 case ["+", _ColExpr(column_type="int" | "float") as operand]: 

239 # + is a no-op. 

240 return operand 

241 case ["-", _ColExpr(column_type="int" | "float", value=expr)]: 

242 return _ColExpr(UnaryExpression(operand=expr, operator="-")) 

243 raise InvalidQueryError( 

244 f"Unary operator {operator!r} is not valid for operand of type {operand.column_type} in {node!s}." 

245 ) 

246 

247 

248def _make_literal(value: LiteralValue) -> _ColExpr: 

249 return _ColExpr(make_column_literal(value)) 

250 

251 

252def _to_timespan_bound(value: _VisitorResult, node: Node) -> astropy.time.Time | None: 

253 match (value): 

254 case _ColExpr(value=expr) if expr.expression_type == "datetime": 

255 return expr.value 

256 case _Null(): 

257 return None 

258 

259 raise InvalidQueryError( 

260 f'Invalid type in timespan tuple "{node}" ' 

261 '(Note that date/time strings must be preceded by "T" to be recognized).' 

262 ) 

263 

264 

265def _convert_comparison_operator(value: str) -> ComparisonOperator: 

266 """Convert an expression-string comparison operator to the format 

267 used by QueryTree. 

268 """ 

269 match value: 

270 case "=": 

271 return "==" 

272 case "OVERLAPS": 

273 return "overlaps" 

274 case ("!=" | "<" | ">" | "<=" | ">=") as op: 

275 return op 

276 case _: 

277 raise AssertionError(f"Unhandled comparison operator {value}")