Coverage for python / lsst / daf / butler / queries / tree / _column_expression.py: 39%

136 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-18 08:43 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "BinaryExpression", 

32 "BinaryOperator", 

33 "ColumnExpression", 

34 "OrderExpression", 

35 "Reversed", 

36 "UnaryExpression", 

37 "UnaryOperator", 

38 "is_one_datetime_and_one_ingest_date", 

39 "is_one_timespan_and_one_datetime", 

40 "is_one_timespan_and_one_ingest_date", 

41 "validate_order_expression", 

42) 

43 

44from typing import TYPE_CHECKING, Annotated, Literal, NamedTuple, TypeAlias, TypeVar, final 

45 

46import pydantic 

47 

48from ..._exceptions import InvalidQueryError 

49from ...column_spec import ColumnType 

50from ._base import ColumnExpressionBase 

51from ._column_literal import ColumnLiteral 

52from ._column_reference import ColumnReference 

53from ._column_set import ColumnSet 

54 

55if TYPE_CHECKING: 

56 from ..visitors import ColumnExpressionVisitor 

57 

58 

59_T = TypeVar("_T") 

60 

61 

62UnaryOperator: TypeAlias = Literal["-", "begin_of", "end_of"] 

63BinaryOperator: TypeAlias = Literal["+", "-", "*", "/", "%"] 

64 

65 

66@final 

67class UnaryExpression(ColumnExpressionBase): 

68 """A unary operation on a column expression that returns a non-boolean 

69 value. 

70 """ 

71 

72 expression_type: Literal["unary"] = "unary" 

73 

74 operand: ColumnExpression 

75 """Expression this one operates on.""" 

76 

77 operator: UnaryOperator 

78 """Operator this expression applies.""" 

79 

80 def gather_required_columns(self, columns: ColumnSet) -> None: 

81 # Docstring inherited. 

82 self.operand.gather_required_columns(columns) 

83 

84 def gather_governors(self, governors: set[str]) -> None: 

85 # Docstring inherited. 

86 self.operand.gather_governors(governors) 

87 

88 @property 

89 def column_type(self) -> ColumnType: 

90 # Docstring inherited. 

91 match self.operator: 

92 case "-": 

93 return self.operand.column_type 

94 case "begin_of" | "end_of": 

95 return "datetime" 

96 raise AssertionError(f"Invalid unary expression operator {self.operator}.") 

97 

98 def __str__(self) -> str: 

99 s = str(self.operand) 

100 if not (self.operand.is_literal or self.operand.is_column_reference): 

101 s = f"({s})" 

102 match self.operator: 

103 case "-": 

104 return f"-{s}" 

105 case "begin_of": 

106 return f"{s}.begin" 

107 case "end_of": 

108 return f"{s}.end" 

109 

110 @pydantic.model_validator(mode="after") 

111 def _validate_types(self) -> UnaryExpression: 

112 match (self.operator, self.operand.column_type): 

113 case ("-", "int" | "float"): 

114 pass 

115 case ("begin_of" | "end_of", "timespan"): 

116 pass 

117 case _: 

118 raise InvalidQueryError( 

119 f"Invalid column type {self.operand.column_type} for operator {self.operator!r}." 

120 ) 

121 return self 

122 

123 def visit(self, visitor: ColumnExpressionVisitor[_T]) -> _T: 

124 # Docstring inherited. 

125 return visitor.visit_unary_expression(self) 

126 

127 

128@final 

129class BinaryExpression(ColumnExpressionBase): 

130 """A binary operation on column expressions that returns a non-boolean 

131 value. 

132 """ 

133 

134 expression_type: Literal["binary"] = "binary" 

135 

136 a: ColumnExpression 

137 """Left-hand side expression this one operates on.""" 

138 

139 b: ColumnExpression 

140 """Right-hand side expression this one operates on.""" 

141 

142 operator: BinaryOperator 

143 """Operator this expression applies. 

144 

145 Integer '/' and '%' are defined as in SQL, not Python (though the 

146 definitions are the same for positive arguments). 

147 """ 

148 

149 def gather_required_columns(self, columns: ColumnSet) -> None: 

150 # Docstring inherited. 

151 self.a.gather_required_columns(columns) 

152 self.b.gather_required_columns(columns) 

153 

154 def gather_governors(self, governors: set[str]) -> None: 

155 # Docstring inherited. 

156 self.a.gather_governors(governors) 

157 self.b.gather_governors(governors) 

158 

159 @property 

160 def column_type(self) -> ColumnType: 

161 # Docstring inherited. 

162 return self.a.column_type 

163 

164 def __str__(self) -> str: 

165 a = str(self.a) 

166 b = str(self.b) 

167 if not (self.a.is_literal or self.a.is_column_reference): 

168 a = f"({a})" 

169 if not (self.b.is_literal or self.b.is_column_reference): 

170 b = f"({b})" 

171 return f"{a} {self.operator} {b}" 

172 

173 @pydantic.model_validator(mode="after") 

174 def _validate_types(self) -> BinaryExpression: 

175 if self.a.column_type != self.b.column_type: 

176 raise InvalidQueryError( 

177 f"Column types for operator {self.operator} do not agree " 

178 f"({self.a.column_type}, {self.b.column_type})." 

179 ) 

180 match (self.operator, self.a.column_type): 

181 case ("+" | "-" | "*" | "/", "int" | "float"): 

182 pass 

183 case ("%", "int"): 

184 pass 

185 case _: 

186 raise InvalidQueryError( 

187 f"Invalid column type {self.a.column_type} for operator {self.operator!r}." 

188 ) 

189 return self 

190 

191 def visit(self, visitor: ColumnExpressionVisitor[_T]) -> _T: 

192 # Docstring inherited. 

193 return visitor.visit_binary_expression(self) 

194 

195 

196# Union without Pydantic annotation for the discriminator, for use in nesting 

197# in other unions that will add that annotation. It's not clear whether it 

198# would work to just nest the annotated ones, but it seems safest not to rely 

199# on undocumented behavior. 

200_ColumnExpression: TypeAlias = ColumnLiteral | ColumnReference | UnaryExpression | BinaryExpression 

201 

202 

203ColumnExpression: TypeAlias = Annotated[_ColumnExpression, pydantic.Field(discriminator="expression_type")] 

204 

205 

206@final 

207class Reversed(ColumnExpressionBase): 

208 """A tag wrapper for `ColumnExpression` that indicates sorting in 

209 reverse order. 

210 """ 

211 

212 expression_type: Literal["reversed"] = "reversed" 

213 

214 operand: ColumnExpression 

215 """Expression to sort on in reverse.""" 

216 

217 def gather_required_columns(self, columns: ColumnSet) -> None: 

218 # Docstring inherited. 

219 self.operand.gather_required_columns(columns) 

220 

221 def gather_governors(self, governors: set[str]) -> None: 

222 self.operand.gather_governors(governors) 

223 

224 @property 

225 def column_type(self) -> ColumnType: 

226 # Docstring inherited. 

227 return self.operand.column_type 

228 

229 def __str__(self) -> str: 

230 return f"{self.operand} DESC" 

231 

232 def visit(self, visitor: ColumnExpressionVisitor[_T]) -> _T: 

233 # Docstring inherited. 

234 return visitor.visit_reversed(self) 

235 

236 

237def validate_order_expression(expression: _ColumnExpression | Reversed) -> _ColumnExpression | Reversed: 

238 """Check that a column expression can be used for sorting. 

239 

240 Parameters 

241 ---------- 

242 expression : `OrderExpression` 

243 Expression to check. 

244 

245 Returns 

246 ------- 

247 expression : `OrderExpression` 

248 The checked expression; returned to make this usable as a Pydantic 

249 validator. 

250 

251 Raises 

252 ------ 

253 InvalidQueryError 

254 Raised if this expression is not one that can be used for sorting. 

255 """ 

256 if expression.column_type not in ("int", "string", "float", "datetime", "ingest_date"): 

257 raise InvalidQueryError(f"Column type {expression.column_type} of {expression} is not ordered.") 

258 return expression 

259 

260 

261OrderExpression: TypeAlias = Annotated[ 

262 _ColumnExpression | Reversed, 

263 pydantic.Field(discriminator="expression_type"), 

264 pydantic.AfterValidator(validate_order_expression), 

265] 

266 

267 

268class TimespanAndDatetime(NamedTuple): 

269 timespan: ColumnExpression 

270 datetime: ColumnExpression 

271 

272 

273def is_one_timespan_and_one_datetime( 

274 a: ColumnExpression, b: ColumnExpression 

275) -> TimespanAndDatetime | None: # numpydoc ignore=PR01 

276 """Check whether the two columns ``a`` and `b`` include one datetime column 

277 and one timespan column. 

278 

279 Returns 

280 ------- 

281 which_is_which : `TimespanAndDatetime` | None 

282 An object telling which column is the datetime and which is the 

283 timespan, or `None` if the types were not as expected. 

284 """ 

285 if a.column_type == "timespan" and b.column_type == "datetime": 

286 return TimespanAndDatetime(a, b) 

287 elif a.column_type == "datetime" and b.column_type == "timespan": 

288 return TimespanAndDatetime(b, a) 

289 else: 

290 return None 

291 

292 

293def is_one_datetime_and_one_ingest_date( 

294 a: ColumnExpression, b: ColumnExpression 

295) -> bool: # numpydoc ignore=PR01 

296 """Return `True` if the two columns ``a`` and `b`` include one datetime 

297 column and one ingest_date column. 

298 """ 

299 return (a.column_type == "datetime" and b.column_type == "ingest_date") or ( 

300 a.column_type == "ingest_date" and b.column_type == "datetime" 

301 ) 

302 

303 

304def is_one_timespan_and_one_ingest_date( 

305 a: ColumnExpression, b: ColumnExpression 

306) -> TimespanAndDatetime | None: 

307 """Return `True` if the two columns ``a`` and `b`` include one timespan 

308 column and one ingest_date column. 

309 

310 Parameters 

311 ---------- 

312 a : `ColumnExpression` 

313 First column expression. 

314 b : `ColumnExpression` 

315 Second column expression. 

316 

317 Returns 

318 ------- 

319 which_is_which : `TimespanAndDatetime` | None 

320 An object telling which column is the ingest_date and which is the 

321 timespan, or `None` if the types were not as expected. 

322 """ 

323 if a.column_type == "timespan" and b.column_type == "ingest_date": 

324 return TimespanAndDatetime(a, b) 

325 elif a.column_type == "ingest_date" and b.column_type == "timespan": 

326 return TimespanAndDatetime(b, a) 

327 else: 

328 return None 

329 

330 

331def is_numeric(expr: ColumnExpression) -> bool: 

332 """Return `True` if the expression is a numeric type (float or int). 

333 

334 Parameters 

335 ---------- 

336 expr : `ColumnExpression` 

337 Column expression to test. 

338 """ 

339 return expr.column_type == "float" or expr.column_type == "int"