Coverage for python / lsst / daf / butler / queries / expressions / parser / parserYacc.py: 33%

199 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-24 08:17 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28"""Syntax definition for user expression parser.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ["ParseError", "ParserEOFError", "ParserYacc", "ParserYaccError", "YaccProduction"] 

33 

34import functools 

35import re 

36import warnings 

37from typing import Any, Protocol 

38 

39import astropy.time 

40 

41# As of astropy 4.2, the erfa interface is shipped independently and 

42# ErfaWarning is no longer an AstropyWarning 

43try: 

44 import erfa 

45except ImportError: 

46 erfa = None 

47 

48from .exprTree import ( 

49 BinaryOp, 

50 BindName, 

51 Identifier, 

52 IsIn, 

53 Node, 

54 NumericLiteral, 

55 Parens, 

56 RangeLiteral, 

57 StringLiteral, 

58 TimeLiteral, 

59 TupleNode, 

60 UnaryOp, 

61 function_call, 

62) 

63from .parserLex import LexToken, ParserLex, ParserLexError 

64from .ply import yacc 

65 

66 

67class YaccProduction(Protocol): 

68 """Protocol for YaccProduction defined in ``ply.yacc``.""" 

69 

70 lexer: Any 

71 value: Any 

72 

73 def __getitem__(self, n: int) -> Any: ... 73 ↛ exitline 73 didn't return from function '__getitem__' because

74 def __setitem__(self, n: int, v: Any) -> None: ... 74 ↛ exitline 74 didn't return from function '__setitem__' because

75 def __len__(self) -> int: ... 75 ↛ exitline 75 didn't return from function '__len__' because

76 def lineno(self, n: int) -> int: ... 76 ↛ exitline 76 didn't return from function 'lineno' because

77 def lexpos(self, n: int) -> int: ... 77 ↛ exitline 77 didn't return from function 'lexpos' because

78 

79 

80# The purpose of this regex is to guess time format if it is not explicitly 

81# provided in the string itself 

82_re_time_str = re.compile( 

83 r""" 

84 ((?P<format>\w+)/)? # optionally prefixed by "format/" 

85 (?P<value> 

86 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number 

87 | 

88 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t) [no timezone] 

89 | 

90 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits 

91 | 

92 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday 

93 ) 

94 (/(?P<scale>\w+))? # optionally followed by "/scale" 

95 $ 

96""", 

97 re.VERBOSE | re.IGNORECASE, 

98) 

99 

100 

101def _parseTimeString(time_str: str) -> astropy.time.Time: 

102 """Try to convert time string into astropy.Time. 

103 

104 Parameters 

105 ---------- 

106 time_str : `str` 

107 Input string. 

108 

109 Returns 

110 ------- 

111 time : `astropy.time.Time` 

112 The parsed time. 

113 

114 Raises 

115 ------ 

116 ValueError 

117 Raised if input string has unexpected format. 

118 """ 

119 # Check for time zone. Python datetime objects can be timezone-aware 

120 # and if one has been stringified then there will be a +00:00 on the end. 

121 # Special case UTC. Fail for other timezones. 

122 time_str = time_str.replace("+00:00", "") 

123 

124 match = _re_time_str.match(time_str) 

125 if not match: 

126 raise ValueError(f'Time string "{time_str}" does not match known formats') 

127 

128 value, fmt, scale = match.group("value", "format", "scale") 

129 if fmt is not None: 

130 fmt = fmt.lower() 

131 if fmt not in astropy.time.Time.FORMATS: 

132 raise ValueError(f'Time string "{time_str}" specifies unknown time format "{fmt}"') 

133 if scale is not None: 

134 scale = scale.lower() 

135 if scale not in astropy.time.Time.SCALES: 

136 raise ValueError(f'Time string "{time_str}" specifies unknown time scale "{scale}"') 

137 

138 # convert number string to floating point 

139 if match.group("number") is not None: 

140 value = float(value) 

141 

142 # guess format if not given 

143 if fmt is None: 

144 if match.group("number") is not None: 

145 fmt = "mjd" 

146 elif match.group("iso") is not None: 

147 if "T" in value or "t" in value: 

148 fmt = "isot" 

149 else: 

150 fmt = "iso" 

151 elif match.group("fits") is not None: 

152 fmt = "fits" 

153 elif match.group("yday") is not None: 

154 fmt = "yday" 

155 assert fmt is not None 

156 

157 # guess scale if not given 

158 if scale is None: 

159 if fmt in ("iso", "isot", "fits", "yday", "unix"): 

160 scale = "utc" 

161 elif fmt == "cxcsec": 

162 scale = "tt" 

163 else: 

164 scale = "tai" 

165 

166 try: 

167 # Hide warnings about future dates 

168 with warnings.catch_warnings(): 

169 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning) 

170 if erfa is not None: 

171 warnings.simplefilter("ignore", category=erfa.ErfaWarning) 

172 value = astropy.time.Time(value, format=fmt, scale=scale) 

173 except ValueError: 

174 # astropy makes very verbose exception that is not super-useful in 

175 # many context, just say we don't like it. 

176 raise ValueError(f'Time string "{time_str}" does not match format "{fmt}"') from None 

177 

178 return value 

179 

180 

181# ------------------------ 

182# Exported definitions -- 

183# ------------------------ 

184 

185 

186class ParserYaccError(Exception): 

187 """Base class for exceptions generated by parser.""" 

188 

189 pass 

190 

191 

192class ParseError(ParserYaccError): 

193 """Exception raised for parsing errors. 

194 

195 Parameters 

196 ---------- 

197 expression : `str` 

198 Full initial expression being parsed. 

199 token : `str` 

200 Current token at parsing position. 

201 pos : `int` 

202 Current parsing position, offset from beginning of expression in 

203 characters. 

204 lineno : `int` 

205 Current line number in the expression. 

206 

207 Attributes 

208 ---------- 

209 expression : `str` 

210 Full initial expression being parsed. 

211 token : `str` 

212 Current token at parsing position. 

213 pos : `int` 

214 Current parsing position, offset from beginning of expression in 

215 characters. 

216 lineno : `int` 

217 Current line number in the expression. 

218 posInLine : `int` 

219 Parsing position in current line, 0-based. 

220 """ 

221 

222 def __init__(self, expression: str, token: str, pos: int, lineno: int): 

223 self.expression = expression 

224 self.token = token 

225 self.pos = pos 

226 self.lineno = lineno 

227 self.posInLine = self._posInLine() 

228 msg = f"Syntax error at or near '{token}' (line: {lineno}, pos: {self.posInLine + 1})" 

229 ParserYaccError.__init__(self, msg) 

230 

231 def _posInLine(self) -> int: 

232 """Return position in current line""" 

233 lines = self.expression.split("\n") 

234 pos = self.pos 

235 for line in lines[: self.lineno - 1]: 

236 # +1 for newline 

237 pos -= len(line) + 1 

238 return pos 

239 

240 

241class ParserEOFError(ParserYaccError): 

242 """Exception raised for EOF-during-parser.""" 

243 

244 def __init__(self) -> None: 

245 Exception.__init__(self, "End of input reached while expecting further input") 

246 

247 

248class ParserYacc: 

249 """Class which defines PLY grammar. 

250 

251 Based on MySQL grammar for expressions 

252 (https://dev.mysql.com/doc/refman/5.7/en/expressions.html). 

253 

254 Parameters 

255 ---------- 

256 **kwargs 

257 Optional keyword arguments that are passed to `yacc.yacc` constructor. 

258 """ 

259 

260 def __init__(self, **kwargs: Any): 

261 kw = dict(write_tables=0, debug=False) 

262 kw.update(kwargs) 

263 self.parser = self._parser_factory(**kw) 

264 

265 @staticmethod 

266 @functools.cache 

267 def _parser_factory(**kwarg: Any) -> Any: 

268 """Make parser instance.""" 

269 return yacc.yacc(module=ParserYacc, **kwarg) 

270 

271 def parse(self, input: str, lexer: Any = None, debug: bool = False, tracking: bool = False) -> Node: 

272 """Parse input expression ad return parsed tree object. 

273 

274 This is a trivial wrapper for yacc.LRParser.parse method which 

275 provides lexer if not given in arguments. 

276 

277 Parameters 

278 ---------- 

279 input : `str` 

280 Expression to parse. 

281 lexer : `object`, optional 

282 Lexer instance, if not given then ParserLex.make_lexer() is 

283 called to create one. 

284 debug : `bool`, optional 

285 Set to True for debugging output. 

286 tracking : `bool`, optional 

287 Set to True for tracking line numbers in parser. 

288 """ 

289 # make lexer 

290 if lexer is None: 

291 lexer = ParserLex.make_lexer() 

292 try: 

293 tree = self.parser.parse(input=input, lexer=lexer, debug=debug, tracking=tracking) 

294 except ParserLexError as exc: 

295 # Convert it into ParserYaccError 

296 raise ParserYaccError(str(exc)) from exc 

297 return tree 

298 

299 tokens = ParserLex.tokens[:] 

300 

301 precedence = ( 

302 ("left", "OR"), 

303 ("left", "AND"), 

304 ("nonassoc", "OVERLAPS"), # Nonassociative operators 

305 ("nonassoc", "EQ", "NE"), # Nonassociative operators 

306 ("nonassoc", "LT", "LE", "GT", "GE"), # Nonassociative operators 

307 ("left", "ADD", "SUB"), 

308 ("left", "MUL", "DIV", "MOD"), 

309 ("right", "UPLUS", "UMINUS", "NOT"), # unary plus and minus 

310 ) 

311 

312 # this is the starting rule 

313 @classmethod 

314 def p_input(cls, p: YaccProduction) -> None: 

315 """input : expr 

316 | empty 

317 """ 

318 p[0] = p[1] 

319 

320 @classmethod 

321 def p_empty(cls, p: YaccProduction) -> None: 

322 """empty :""" 

323 p[0] = None 

324 

325 @classmethod 

326 def p_expr(cls, p: YaccProduction) -> None: 

327 """expr : expr OR expr 

328 | expr AND expr 

329 | NOT expr 

330 | bool_primary 

331 """ 

332 if len(p) == 4: 

333 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3]) 

334 elif len(p) == 3: 

335 p[0] = UnaryOp(op=p[1].upper(), operand=p[2]) 

336 else: 

337 p[0] = p[1] 

338 

339 @classmethod 

340 def p_bool_primary(cls, p: YaccProduction) -> None: 

341 """bool_primary : bool_primary EQ predicate 

342 | bool_primary NE predicate 

343 | bool_primary LT predicate 

344 | bool_primary LE predicate 

345 | bool_primary GE predicate 

346 | bool_primary GT predicate 

347 | bool_primary OVERLAPS predicate 

348 | predicate 

349 """ 

350 if len(p) == 2: 

351 p[0] = p[1] 

352 else: 

353 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) 

354 

355 @classmethod 

356 def p_predicate(cls, p: YaccProduction) -> None: 

357 """predicate : bit_expr IN LPAREN literal_or_bind_list RPAREN 

358 | bit_expr NOT IN LPAREN literal_or_bind_list RPAREN 

359 | bit_expr 

360 """ 

361 if len(p) == 6: 

362 p[0] = IsIn(lhs=p[1], values=p[4]) 

363 elif len(p) == 7: 

364 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True) 

365 else: 

366 p[0] = p[1] 

367 

368 @classmethod 

369 def p_simple_id(cls, p: YaccProduction) -> None: 

370 """simple_id : SIMPLE_IDENTIFIER""" 

371 p[0] = Identifier(p[1]) 

372 

373 @classmethod 

374 def p_qualified_id(cls, p: YaccProduction) -> None: 

375 """qualified_id : QUALIFIED_IDENTIFIER""" 

376 p[0] = Identifier(p[1]) 

377 

378 @classmethod 

379 def p_identifier(cls, p: YaccProduction) -> None: 

380 """identifier : simple_id 

381 | qualified_id 

382 """ 

383 p[0] = p[1] 

384 

385 @classmethod 

386 def p_literal_or_id_list(cls, p: YaccProduction) -> None: 

387 """literal_or_bind_list : literal_or_bind_list COMMA literal 

388 | literal_or_bind_list COMMA simple_id 

389 | literal_or_bind_list COMMA bind_name 

390 | literal_or_bind_list COMMA function_call 

391 | literal 

392 | simple_id 

393 | bind_name 

394 | function_call 

395 """ 

396 # This expression is only used in IN() operator and it is supposed to 

397 # include only literals and bind names (and identifiers as we still 

398 # allow simple identifiers as bind names). UUID literal is implemented 

399 # via UUID() function call, so we need to allow function calls here 

400 # too. IsIn will check that all operands are literals or binds. 

401 if len(p) == 2: 

402 p[0] = [p[1]] 

403 else: 

404 p[0] = p[1] + [p[3]] 

405 

406 @classmethod 

407 def p_bind_name(cls, p: YaccProduction) -> None: 

408 """bind_name : BIND_NAME""" 

409 p[0] = BindName(p[1]) 

410 

411 @classmethod 

412 def p_bit_expr(cls, p: YaccProduction) -> None: 

413 """bit_expr : bit_expr ADD bit_expr 

414 | bit_expr SUB bit_expr 

415 | bit_expr MUL bit_expr 

416 | bit_expr DIV bit_expr 

417 | bit_expr MOD bit_expr 

418 | simple_expr 

419 """ 

420 if len(p) == 2: 

421 p[0] = p[1] 

422 else: 

423 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) 

424 

425 @classmethod 

426 def p_simple_expr_lit(cls, p: YaccProduction) -> None: 

427 """simple_expr : literal 

428 | identifier 

429 | bind_name 

430 | function_call 

431 """ 

432 p[0] = p[1] 

433 

434 @classmethod 

435 def p_simple_expr_unary(cls, p: YaccProduction) -> None: 

436 """simple_expr : ADD simple_expr %prec UPLUS 

437 | SUB simple_expr %prec UMINUS 

438 """ 

439 p[0] = UnaryOp(op=p[1], operand=p[2]) 

440 

441 @classmethod 

442 def p_simple_expr_paren(cls, p: YaccProduction) -> None: 

443 """simple_expr : LPAREN expr RPAREN""" 

444 p[0] = Parens(p[2]) 

445 

446 @classmethod 

447 def p_simple_expr_tuple(cls, p: YaccProduction) -> None: 

448 """simple_expr : LPAREN expr COMMA expr RPAREN""" 

449 # For now we only support tuples with two items, 

450 # these are used for time ranges. 

451 p[0] = TupleNode((p[2], p[4])) 

452 

453 @classmethod 

454 def p_literal_num(cls, p: YaccProduction) -> None: 

455 """literal : NUMERIC_LITERAL""" 

456 p[0] = NumericLiteral(p[1]) 

457 

458 @classmethod 

459 def p_literal_num_signed(cls, p: YaccProduction) -> None: 

460 """literal : ADD NUMERIC_LITERAL %prec UPLUS 

461 | SUB NUMERIC_LITERAL %prec UMINUS 

462 """ 

463 p[0] = NumericLiteral(p[1] + p[2]) 

464 

465 @classmethod 

466 def p_literal_str(cls, p: YaccProduction) -> None: 

467 """literal : STRING_LITERAL""" 

468 p[0] = StringLiteral(p[1]) 

469 

470 @classmethod 

471 def p_literal_time(cls, p: YaccProduction) -> None: 

472 """literal : TIME_LITERAL""" 

473 try: 

474 value = _parseTimeString(p[1]) 

475 except ValueError as e: 

476 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1)) from e 

477 p[0] = TimeLiteral(value) 

478 

479 @classmethod 

480 def p_literal_range(cls, p: YaccProduction) -> None: 

481 """literal : RANGE_LITERAL""" 

482 # RANGE_LITERAL value is tuple of three numbers 

483 start, stop, stride = p[1] 

484 p[0] = RangeLiteral(start, stop, stride) 

485 

486 @classmethod 

487 def p_function_call(cls, p: YaccProduction) -> None: 

488 """function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN""" 

489 p[0] = function_call(p[1], p[3]) 

490 

491 @classmethod 

492 def p_expr_list(cls, p: YaccProduction) -> None: 

493 """expr_list : expr_list COMMA expr 

494 | expr 

495 | empty 

496 """ 

497 if len(p) == 2: 

498 if p[1] is None: 

499 p[0] = [] 

500 else: 

501 p[0] = [p[1]] 

502 else: 

503 p[0] = p[1] + [p[3]] 

504 

505 # ---------- end of all grammar rules ---------- 

506 

507 # Error rule for syntax errors 

508 @classmethod 

509 def p_error(cls, p: LexToken | None) -> None: 

510 if p is None: 

511 raise ParserEOFError() 

512 else: 

513 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)