Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py: 21%

162 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-30 02:51 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28# type: ignore 

29 

30"""Syntax definition for user expression parser. 

31""" 

32 

33__all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"] 

34 

35# ------------------------------- 

36# Imports of standard modules -- 

37# ------------------------------- 

38import re 

39import warnings 

40 

41# ----------------------------- 

42# Imports for other modules -- 

43# ----------------------------- 

44import astropy.time 

45 

46# As of astropy 4.2, the erfa interface is shipped independently and 

47# ErfaWarning is no longer an AstropyWarning 

48try: 

49 import erfa 

50except ImportError: 

51 erfa = None 

52 

53from .exprTree import ( 

54 BinaryOp, 

55 Identifier, 

56 IsIn, 

57 NumericLiteral, 

58 Parens, 

59 RangeLiteral, 

60 StringLiteral, 

61 TimeLiteral, 

62 TupleNode, 

63 UnaryOp, 

64 function_call, 

65) 

66from .parserLex import ParserLex 

67from .ply import yacc 

68 

69# ---------------------------------- 

70# Local non-exported definitions -- 

71# ---------------------------------- 

72 

73# The purpose of this regex is to guess time format if it is not explicitly 

74# provided in the string itself 

75_re_time_str = re.compile( 

76 r""" 

77 ((?P<format>\w+)/)? # optionally prefixed by "format/" 

78 (?P<value> 

79 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number 

80 | 

81 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t) [no timezone] 

82 | 

83 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits 

84 | 

85 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday 

86 ) 

87 (/(?P<scale>\w+))? # optionally followed by "/scale" 

88 $ 

89""", 

90 re.VERBOSE | re.IGNORECASE, 

91) 

92 

93 

94def _parseTimeString(time_str): 

95 """Try to convert time string into astropy.Time. 

96 

97 Parameters 

98 ---------- 

99 time_str : `str` 

100 Input string. 

101 

102 Returns 

103 ------- 

104 time : `astropy.time.Time` 

105 The parsed time. 

106 

107 Raises 

108 ------ 

109 ValueError 

110 Raised if input string has unexpected format. 

111 """ 

112 # Check for time zone. Python datetime objects can be timezone-aware 

113 # and if one has been stringified then there will be a +00:00 on the end. 

114 # Special case UTC. Fail for other timezones. 

115 time_str = time_str.replace("+00:00", "") 

116 

117 match = _re_time_str.match(time_str) 

118 if not match: 

119 raise ValueError(f'Time string "{time_str}" does not match known formats') 

120 

121 value, fmt, scale = match.group("value", "format", "scale") 

122 if fmt is not None: 

123 fmt = fmt.lower() 

124 if fmt not in astropy.time.Time.FORMATS: 

125 raise ValueError(f'Time string "{time_str}" specifies unknown time format "{fmt}"') 

126 if scale is not None: 

127 scale = scale.lower() 

128 if scale not in astropy.time.Time.SCALES: 

129 raise ValueError(f'Time string "{time_str}" specifies unknown time scale "{scale}"') 

130 

131 # convert number string to floating point 

132 if match.group("number") is not None: 

133 value = float(value) 

134 

135 # guess format if not given 

136 if fmt is None: 

137 if match.group("number") is not None: 

138 fmt = "mjd" 

139 elif match.group("iso") is not None: 

140 if "T" in value or "t" in value: 

141 fmt = "isot" 

142 else: 

143 fmt = "iso" 

144 elif match.group("fits") is not None: 

145 fmt = "fits" 

146 elif match.group("yday") is not None: 

147 fmt = "yday" 

148 assert fmt is not None 

149 

150 # guess scale if not given 

151 if scale is None: 

152 if fmt in ("iso", "isot", "fits", "yday", "unix"): 

153 scale = "utc" 

154 elif fmt == "cxcsec": 

155 scale = "tt" 

156 else: 

157 scale = "tai" 

158 

159 try: 

160 # Hide warnings about future dates 

161 with warnings.catch_warnings(): 

162 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning) 

163 if erfa is not None: 

164 warnings.simplefilter("ignore", category=erfa.ErfaWarning) 

165 value = astropy.time.Time(value, format=fmt, scale=scale) 

166 except ValueError: 

167 # astropy makes very verbose exception that is not super-useful in 

168 # many context, just say we don't like it. 

169 raise ValueError(f'Time string "{time_str}" does not match format "{fmt}"') from None 

170 

171 return value 

172 

173 

174# ------------------------ 

175# Exported definitions -- 

176# ------------------------ 

177 

178 

179class ParserYaccError(Exception): 

180 """Base class for exceptions generated by parser.""" 

181 

182 pass 

183 

184 

185class ParseError(ParserYaccError): 

186 """Exception raised for parsing errors. 

187 

188 Parameters 

189 ---------- 

190 expression : `str` 

191 Full initial expression being parsed. 

192 token : `str` 

193 Current token at parsing position. 

194 pos : `int` 

195 Current parsing position, offset from beginning of expression in 

196 characters. 

197 lineno : `int` 

198 Current line number in the expression. 

199 

200 Attributes 

201 ---------- 

202 expression : `str` 

203 Full initial expression being parsed. 

204 token : `str` 

205 Current token at parsing position. 

206 pos : `int` 

207 Current parsing position, offset from beginning of expression in 

208 characters. 

209 lineno : `int` 

210 Current line number in the expression. 

211 posInLine : `int` 

212 Parsing position in current line, 0-based. 

213 """ 

214 

215 def __init__(self, expression, token, pos, lineno): 

216 self.expression = expression 

217 self.token = token 

218 self.pos = pos 

219 self.lineno = lineno 

220 self.posInLine = self._posInLine() 

221 msg = "Syntax error at or near '{0}' (line: {1}, pos: {2})" 

222 msg = msg.format(token, lineno, self.posInLine + 1) 

223 ParserYaccError.__init__(self, msg) 

224 

225 def _posInLine(self): 

226 """Return position in current line""" 

227 lines = self.expression.split("\n") 

228 pos = self.pos 

229 for line in lines[: self.lineno - 1]: 

230 # +1 for newline 

231 pos -= len(line) + 1 

232 return pos 

233 

234 

235class ParserEOFError(ParserYaccError): 

236 """Exception raised for EOF-during-parser.""" 

237 

238 def __init__(self): 

239 Exception.__init__(self, "End of input reached while expecting further input") 

240 

241 

242class ParserYacc: 

243 """Class which defines PLY grammar. 

244 

245 Based on MySQL grammar for expressions 

246 (https://dev.mysql.com/doc/refman/5.7/en/expressions.html). 

247 

248 Parameters 

249 ---------- 

250 idMap : `collections.abc.Mapping` [ `str`, `Node` ], optional 

251 Mapping that provides substitutions for identifiers in the expression. 

252 The key in the map is the identifier name, the value is the 

253 `exprTree.Node` instance that will replace identifier in the full 

254 expression. If identifier does not exist in the mapping then 

255 `Identifier` is inserted into parse tree. 

256 **kwargs 

257 Optional keyword arguments that are passed to `yacc.yacc` constructor. 

258 """ 

259 

260 def __init__(self, idMap=None, **kwargs): 

261 kw = dict(write_tables=0, debug=False) 

262 kw.update(kwargs) 

263 

264 self.parser = yacc.yacc(module=self, **kw) 

265 self._idMap = idMap or {} 

266 

267 def parse(self, input, lexer=None, debug=False, tracking=False): 

268 """Parse input expression ad return parsed tree object. 

269 

270 This is a trivial wrapper for yacc.LRParser.parse method which 

271 provides lexer if not given in arguments. 

272 

273 Parameters 

274 ---------- 

275 input : `str` 

276 Expression to parse. 

277 lexer : `object`, optional 

278 Lexer instance, if not given then ParserLex.make_lexer() is 

279 called to create one. 

280 debug : `bool`, optional 

281 Set to True for debugging output. 

282 tracking : `bool`, optional 

283 Set to True for tracking line numbers in parser. 

284 """ 

285 # make lexer 

286 if lexer is None: 

287 lexer = ParserLex.make_lexer() 

288 tree = self.parser.parse(input=input, lexer=lexer, debug=debug, tracking=tracking) 

289 return tree 

290 

291 tokens = ParserLex.tokens[:] 

292 

293 precedence = ( 

294 ("left", "OR"), 

295 ("left", "AND"), 

296 ("nonassoc", "OVERLAPS"), # Nonassociative operators 

297 ("nonassoc", "EQ", "NE"), # Nonassociative operators 

298 ("nonassoc", "LT", "LE", "GT", "GE"), # Nonassociative operators 

299 ("left", "ADD", "SUB"), 

300 ("left", "MUL", "DIV", "MOD"), 

301 ("right", "UPLUS", "UMINUS", "NOT"), # unary plus and minus 

302 ) 

303 

304 # this is the starting rule 

305 def p_input(self, p): 

306 """input : expr 

307 | empty 

308 """ 

309 p[0] = p[1] 

310 

311 def p_empty(self, p): 

312 """empty :""" 

313 p[0] = None 

314 

315 def p_expr(self, p): 

316 """expr : expr OR expr 

317 | expr AND expr 

318 | NOT expr 

319 | bool_primary 

320 """ 

321 if len(p) == 4: 

322 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3]) 

323 elif len(p) == 3: 

324 p[0] = UnaryOp(op=p[1].upper(), operand=p[2]) 

325 else: 

326 p[0] = p[1] 

327 

328 def p_bool_primary(self, p): 

329 """bool_primary : bool_primary EQ predicate 

330 | bool_primary NE predicate 

331 | bool_primary LT predicate 

332 | bool_primary LE predicate 

333 | bool_primary GE predicate 

334 | bool_primary GT predicate 

335 | bool_primary OVERLAPS predicate 

336 | predicate 

337 """ 

338 if len(p) == 2: 

339 p[0] = p[1] 

340 else: 

341 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) 

342 

343 def p_predicate(self, p): 

344 """predicate : bit_expr IN LPAREN literal_or_id_list RPAREN 

345 | bit_expr NOT IN LPAREN literal_or_id_list RPAREN 

346 | bit_expr 

347 """ 

348 if len(p) == 6: 

349 p[0] = IsIn(lhs=p[1], values=p[4]) 

350 elif len(p) == 7: 

351 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True) 

352 else: 

353 p[0] = p[1] 

354 

355 def p_identifier(self, p): 

356 """identifier : SIMPLE_IDENTIFIER 

357 | QUALIFIED_IDENTIFIER 

358 """ 

359 node = self._idMap.get(p[1]) 

360 if node is None: 

361 node = Identifier(p[1]) 

362 p[0] = node 

363 

364 def p_literal_or_id_list(self, p): 

365 """literal_or_id_list : literal_or_id_list COMMA literal 

366 | literal_or_id_list COMMA identifier 

367 | literal 

368 | identifier 

369 """ 

370 if len(p) == 2: 

371 p[0] = [p[1]] 

372 else: 

373 p[0] = p[1] + [p[3]] 

374 

375 def p_bit_expr(self, p): 

376 """bit_expr : bit_expr ADD bit_expr 

377 | bit_expr SUB bit_expr 

378 | bit_expr MUL bit_expr 

379 | bit_expr DIV bit_expr 

380 | bit_expr MOD bit_expr 

381 | simple_expr 

382 """ 

383 if len(p) == 2: 

384 p[0] = p[1] 

385 else: 

386 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) 

387 

388 def p_simple_expr_lit(self, p): 

389 """simple_expr : literal""" 

390 p[0] = p[1] 

391 

392 def p_simple_expr_id(self, p): 

393 """simple_expr : identifier""" 

394 p[0] = p[1] 

395 

396 def p_simple_expr_function_call(self, p): 

397 """simple_expr : function_call""" 

398 p[0] = p[1] 

399 

400 def p_simple_expr_unary(self, p): 

401 """simple_expr : ADD simple_expr %prec UPLUS 

402 | SUB simple_expr %prec UMINUS 

403 """ 

404 p[0] = UnaryOp(op=p[1], operand=p[2]) 

405 

406 def p_simple_expr_paren(self, p): 

407 """simple_expr : LPAREN expr RPAREN""" 

408 p[0] = Parens(p[2]) 

409 

410 def p_simple_expr_tuple(self, p): 

411 """simple_expr : LPAREN expr COMMA expr RPAREN""" 

412 # For now we only support tuples with two items, 

413 # these are used for time ranges. 

414 p[0] = TupleNode((p[2], p[4])) 

415 

416 def p_literal_num(self, p): 

417 """literal : NUMERIC_LITERAL""" 

418 p[0] = NumericLiteral(p[1]) 

419 

420 def p_literal_num_signed(self, p): 

421 """literal : ADD NUMERIC_LITERAL %prec UPLUS 

422 | SUB NUMERIC_LITERAL %prec UMINUS 

423 """ 

424 p[0] = NumericLiteral(p[1] + p[2]) 

425 

426 def p_literal_str(self, p): 

427 """literal : STRING_LITERAL""" 

428 p[0] = StringLiteral(p[1]) 

429 

430 def p_literal_time(self, p): 

431 """literal : TIME_LITERAL""" 

432 try: 

433 value = _parseTimeString(p[1]) 

434 except ValueError as e: 

435 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1)) from e 

436 p[0] = TimeLiteral(value) 

437 

438 def p_literal_range(self, p): 

439 """literal : RANGE_LITERAL""" 

440 # RANGE_LITERAL value is tuple of three numbers 

441 start, stop, stride = p[1] 

442 p[0] = RangeLiteral(start, stop, stride) 

443 

444 def p_function_call(self, p): 

445 """function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN""" 

446 p[0] = function_call(p[1], p[3]) 

447 

448 def p_expr_list(self, p): 

449 """expr_list : expr_list COMMA expr 

450 | expr 

451 | empty 

452 """ 

453 if len(p) == 2: 

454 if p[1] is None: 

455 p[0] = [] 

456 else: 

457 p[0] = [p[1]] 

458 else: 

459 p[0] = p[1] + [p[3]] 

460 

461 # ---------- end of all grammar rules ---------- 

462 

463 # Error rule for syntax errors 

464 def p_error(self, p): 

465 if p is None: 

466 raise ParserEOFError() 

467 else: 

468 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)