Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py: 21%

161 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:55 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22# type: ignore 

23 

24"""Syntax definition for user expression parser. 

25""" 

26 

27__all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"] 

28 

29# ------------------------------- 

30# Imports of standard modules -- 

31# ------------------------------- 

32import re 

33import warnings 

34 

35# ----------------------------- 

36# Imports for other modules -- 

37# ----------------------------- 

38import astropy.time 

39 

40# As of astropy 4.2, the erfa interface is shipped independently and 

41# ErfaWarning is no longer an AstropyWarning 

42try: 

43 import erfa 

44except ImportError: 

45 erfa = None 

46 

47from .exprTree import (BinaryOp, function_call, Identifier, IsIn, NumericLiteral, Parens, 

48 RangeLiteral, StringLiteral, TimeLiteral, TupleNode, UnaryOp) 

49from .ply import yacc 

50from .parserLex import ParserLex 

51 

52# ---------------------------------- 

53# Local non-exported definitions -- 

54# ---------------------------------- 

55 

56# The purpose of this regex is to guess time format if it is not explicitly 

57# provided in the string itself 

58_re_time_str = re.compile(r""" 

59 ((?P<format>\w+)/)? # optionally prefixed by "format/" 

60 (?P<value> 

61 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number 

62 | 

63 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t) 

64 | 

65 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits 

66 | 

67 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday 

68 ) 

69 (/(?P<scale>\w+))? # optionally followed by "/scale" 

70 $ 

71""", re.VERBOSE | re.IGNORECASE) 

72 

73 

74def _parseTimeString(time_str): 

75 """Try to convert time string into astropy.Time. 

76 

77 Parameters 

78 ---------- 

79 time_str : `str` 

80 Input string. 

81 

82 Returns 

83 ------- 

84 time : `astropy.time.Time` 

85 

86 Raises 

87 ------ 

88 ValueError 

89 Raised if input string has unexpected format 

90 """ 

91 match = _re_time_str.match(time_str) 

92 if not match: 

93 raise ValueError(f"Time string \"{time_str}\" does not match known formats") 

94 

95 value, fmt, scale = match.group("value", "format", "scale") 

96 if fmt is not None: 

97 fmt = fmt.lower() 

98 if fmt not in astropy.time.Time.FORMATS: 

99 raise ValueError(f"Time string \"{time_str}\" specifies unknown time format \"{fmt}\"") 

100 if scale is not None: 

101 scale = scale.lower() 

102 if scale not in astropy.time.Time.SCALES: 

103 raise ValueError(f"Time string \"{time_str}\" specifies unknown time scale \"{scale}\"") 

104 

105 # convert number string to floating point 

106 if match.group("number") is not None: 

107 value = float(value) 

108 

109 # guess format if not given 

110 if fmt is None: 

111 if match.group("number") is not None: 

112 fmt = "mjd" 

113 elif match.group("iso") is not None: 

114 if "T" in value or "t" in value: 

115 fmt = "isot" 

116 else: 

117 fmt = "iso" 

118 elif match.group("fits") is not None: 

119 fmt = "fits" 

120 elif match.group("yday") is not None: 

121 fmt = "yday" 

122 assert fmt is not None 

123 

124 # guess scale if not given 

125 if scale is None: 

126 if fmt in ("iso", "isot", "fits", "yday", "unix"): 

127 scale = "utc" 

128 elif fmt == "cxcsec": 

129 scale = "tt" 

130 else: 

131 scale = "tai" 

132 

133 try: 

134 # Hide warnings about future dates 

135 with warnings.catch_warnings(): 

136 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning) 

137 if erfa is not None: 

138 warnings.simplefilter("ignore", category=erfa.ErfaWarning) 

139 value = astropy.time.Time(value, format=fmt, scale=scale) 

140 except ValueError: 

141 # astropy makes very verbose exception that is not super-useful in 

142 # many context, just say we don't like it. 

143 raise ValueError(f"Time string \"{time_str}\" does not match format \"{fmt}\"") from None 

144 

145 return value 

146 

147# ------------------------ 

148# Exported definitions -- 

149# ------------------------ 

150 

151 

152class ParserYaccError(Exception): 

153 """Base class for exceptions generated by parser. 

154 """ 

155 pass 

156 

157 

158class ParseError(ParserYaccError): 

159 """Exception raised for parsing errors. 

160 

161 Attributes 

162 ---------- 

163 expression : str 

164 Full initial expression being parsed 

165 token : str 

166 Current token at parsing position 

167 pos : int 

168 Current parsing position, offset from beginning of expression in 

169 characters 

170 lineno : int 

171 Current line number in the expression 

172 posInLine : int 

173 Parsing position in current line, 0-based 

174 """ 

175 

176 def __init__(self, expression, token, pos, lineno): 

177 self.expression = expression 

178 self.token = token 

179 self.pos = pos 

180 self.lineno = lineno 

181 self.posInLine = self._posInLine() 

182 msg = "Syntax error at or near '{0}' (line: {1}, pos: {2})" 

183 msg = msg.format(token, lineno, self.posInLine + 1) 

184 ParserYaccError.__init__(self, msg) 

185 

186 def _posInLine(self): 

187 """Return position in current line""" 

188 lines = self.expression.split('\n') 

189 pos = self.pos 

190 for line in lines[:self.lineno - 1]: 

191 # +1 for newline 

192 pos -= len(line) + 1 

193 return pos 

194 

195 

196class ParserEOFError(ParserYaccError): 

197 """Exception raised for EOF-during-parser. 

198 """ 

199 

200 def __init__(self): 

201 Exception.__init__(self, 

202 "End of input reached while expecting further input") 

203 

204 

205class ParserYacc: 

206 """Class which defines PLY grammar. 

207 

208 Based on MySQL grammar for expressions 

209 (https://dev.mysql.com/doc/refman/5.7/en/expressions.html). 

210 

211 Parameters 

212 ---------- 

213 idMap : `collections.abc.Mapping` [ `str`, `Node` ], optional 

214 Mapping that provides substitutions for identifiers in the expression. 

215 The key in the map is the identifier name, the value is the 

216 `exprTree.Node` instance that will replace identifier in the full 

217 expression. If identifier does not exist in the mapping then 

218 `Identifier` is inserted into parse tree. 

219 **kwargs 

220 optional keyword arguments that are passed to `yacc.yacc` constructor. 

221 """ 

222 

223 def __init__(self, idMap=None, **kwargs): 

224 

225 kw = dict(write_tables=0, debug=False) 

226 kw.update(kwargs) 

227 

228 self.parser = yacc.yacc(module=self, **kw) 

229 self._idMap = idMap or {} 

230 

231 def parse(self, input, lexer=None, debug=False, tracking=False): 

232 """Parse input expression ad return parsed tree object. 

233 

234 This is a trivial wrapper for yacc.LRParser.parse method which 

235 provides lexer if not given in arguments. 

236 

237 Parameters 

238 ---------- 

239 input : str 

240 Expression to parse 

241 lexer : object, optional 

242 Lexer instance, if not given then ParserLex.make_lexer() is 

243 called to create one. 

244 debug : bool, optional 

245 Set to True for debugging output. 

246 tracking : bool, optional 

247 Set to True for tracking line numbers in parser. 

248 """ 

249 # make lexer 

250 if lexer is None: 

251 lexer = ParserLex.make_lexer() 

252 tree = self.parser.parse(input=input, lexer=lexer, debug=debug, 

253 tracking=tracking) 

254 return tree 

255 

256 tokens = ParserLex.tokens[:] 

257 

258 precedence = ( 

259 ('left', 'OR'), 

260 ('left', 'AND'), 

261 ('nonassoc', 'OVERLAPS'), # Nonassociative operators 

262 ('nonassoc', 'EQ', 'NE'), # Nonassociative operators 

263 ('nonassoc', 'LT', 'LE', 'GT', 'GE'), # Nonassociative operators 

264 ('left', 'ADD', 'SUB'), 

265 ('left', 'MUL', 'DIV', 'MOD'), 

266 ('right', 'UPLUS', 'UMINUS', 'NOT'), # unary plus and minus 

267 ) 

268 

269 # this is the starting rule 

270 def p_input(self, p): 

271 """ input : expr 

272 | empty 

273 """ 

274 p[0] = p[1] 

275 

276 def p_empty(self, p): 

277 """ empty : 

278 """ 

279 p[0] = None 

280 

281 def p_expr(self, p): 

282 """ expr : expr OR expr 

283 | expr AND expr 

284 | NOT expr 

285 | bool_primary 

286 """ 

287 if len(p) == 4: 

288 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3]) 

289 elif len(p) == 3: 

290 p[0] = UnaryOp(op=p[1].upper(), operand=p[2]) 

291 else: 

292 p[0] = p[1] 

293 

294 def p_bool_primary(self, p): 

295 """ bool_primary : bool_primary EQ predicate 

296 | bool_primary NE predicate 

297 | bool_primary LT predicate 

298 | bool_primary LE predicate 

299 | bool_primary GE predicate 

300 | bool_primary GT predicate 

301 | bool_primary OVERLAPS predicate 

302 | predicate 

303 """ 

304 if len(p) == 2: 

305 p[0] = p[1] 

306 else: 

307 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) 

308 

309 def p_predicate(self, p): 

310 """ predicate : bit_expr IN LPAREN literal_or_id_list RPAREN 

311 | bit_expr NOT IN LPAREN literal_or_id_list RPAREN 

312 | bit_expr 

313 """ 

314 if len(p) == 6: 

315 p[0] = IsIn(lhs=p[1], values=p[4]) 

316 elif len(p) == 7: 

317 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True) 

318 else: 

319 p[0] = p[1] 

320 

321 def p_identifier(self, p): 

322 """ identifier : SIMPLE_IDENTIFIER 

323 | QUALIFIED_IDENTIFIER 

324 """ 

325 node = self._idMap.get(p[1]) 

326 if node is None: 

327 node = Identifier(p[1]) 

328 p[0] = node 

329 

330 def p_literal_or_id_list(self, p): 

331 """ literal_or_id_list : literal_or_id_list COMMA literal 

332 | literal_or_id_list COMMA identifier 

333 | literal 

334 | identifier 

335 """ 

336 if len(p) == 2: 

337 p[0] = [p[1]] 

338 else: 

339 p[0] = p[1] + [p[3]] 

340 

341 def p_bit_expr(self, p): 

342 """ bit_expr : bit_expr ADD bit_expr 

343 | bit_expr SUB bit_expr 

344 | bit_expr MUL bit_expr 

345 | bit_expr DIV bit_expr 

346 | bit_expr MOD bit_expr 

347 | simple_expr 

348 """ 

349 if len(p) == 2: 

350 p[0] = p[1] 

351 else: 

352 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) 

353 

354 def p_simple_expr_lit(self, p): 

355 """ simple_expr : literal 

356 """ 

357 p[0] = p[1] 

358 

359 def p_simple_expr_id(self, p): 

360 """ simple_expr : identifier 

361 """ 

362 p[0] = p[1] 

363 

364 def p_simple_expr_function_call(self, p): 

365 """ simple_expr : function_call 

366 """ 

367 p[0] = p[1] 

368 

369 def p_simple_expr_unary(self, p): 

370 """ simple_expr : ADD simple_expr %prec UPLUS 

371 | SUB simple_expr %prec UMINUS 

372 """ 

373 p[0] = UnaryOp(op=p[1], operand=p[2]) 

374 

375 def p_simple_expr_paren(self, p): 

376 """ simple_expr : LPAREN expr RPAREN 

377 """ 

378 p[0] = Parens(p[2]) 

379 

380 def p_simple_expr_tuple(self, p): 

381 """ simple_expr : LPAREN expr COMMA expr RPAREN 

382 """ 

383 # For now we only support tuples with two items, 

384 # these are used for time ranges. 

385 p[0] = TupleNode((p[2], p[4])) 

386 

387 def p_literal_num(self, p): 

388 """ literal : NUMERIC_LITERAL 

389 """ 

390 p[0] = NumericLiteral(p[1]) 

391 

392 def p_literal_num_signed(self, p): 

393 """ literal : ADD NUMERIC_LITERAL %prec UPLUS 

394 | SUB NUMERIC_LITERAL %prec UMINUS 

395 """ 

396 p[0] = NumericLiteral(p[1] + p[2]) 

397 

398 def p_literal_str(self, p): 

399 """ literal : STRING_LITERAL 

400 """ 

401 p[0] = StringLiteral(p[1]) 

402 

403 def p_literal_time(self, p): 

404 """ literal : TIME_LITERAL 

405 """ 

406 try: 

407 value = _parseTimeString(p[1]) 

408 except ValueError: 

409 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1)) 

410 p[0] = TimeLiteral(value) 

411 

412 def p_literal_range(self, p): 

413 """ literal : RANGE_LITERAL 

414 """ 

415 # RANGE_LITERAL value is tuple of three numbers 

416 start, stop, stride = p[1] 

417 p[0] = RangeLiteral(start, stop, stride) 

418 

419 def p_function_call(self, p): 

420 """ function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN 

421 """ 

422 p[0] = function_call(p[1], p[3]) 

423 

424 def p_expr_list(self, p): 

425 """ expr_list : expr_list COMMA expr 

426 | expr 

427 | empty 

428 """ 

429 if len(p) == 2: 

430 if p[1] is None: 

431 p[0] = [] 

432 else: 

433 p[0] = [p[1]] 

434 else: 

435 p[0] = p[1] + [p[3]] 

436 

437 # ---------- end of all grammar rules ---------- 

438 

439 # Error rule for syntax errors 

440 def p_error(self, p): 

441 if p is None: 

442 raise ParserEOFError() 

443 else: 

444 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)