Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py: 24%

161 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2022-08-26 02:22 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22# type: ignore 

23 

24"""Syntax definition for user expression parser. 

25""" 

26 

27__all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"] 

28 

29# ------------------------------- 

30# Imports of standard modules -- 

31# ------------------------------- 

32import re 

33import warnings 

34 

35# ----------------------------- 

36# Imports for other modules -- 

37# ----------------------------- 

38import astropy.time 

39 

40# As of astropy 4.2, the erfa interface is shipped independently and 

41# ErfaWarning is no longer an AstropyWarning 

42try: 

43 import erfa 

44except ImportError: 

45 erfa = None 

46 

47from .exprTree import ( 

48 BinaryOp, 

49 Identifier, 

50 IsIn, 

51 NumericLiteral, 

52 Parens, 

53 RangeLiteral, 

54 StringLiteral, 

55 TimeLiteral, 

56 TupleNode, 

57 UnaryOp, 

58 function_call, 

59) 

60from .parserLex import ParserLex 

61from .ply import yacc 

62 

63# ---------------------------------- 

64# Local non-exported definitions -- 

65# ---------------------------------- 

66 

67# The purpose of this regex is to guess time format if it is not explicitly 

68# provided in the string itself 

69_re_time_str = re.compile( 

70 r""" 

71 ((?P<format>\w+)/)? # optionally prefixed by "format/" 

72 (?P<value> 

73 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number 

74 | 

75 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t) 

76 | 

77 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits 

78 | 

79 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday 

80 ) 

81 (/(?P<scale>\w+))? # optionally followed by "/scale" 

82 $ 

83""", 

84 re.VERBOSE | re.IGNORECASE, 

85) 

86 

87 

88def _parseTimeString(time_str): 

89 """Try to convert time string into astropy.Time. 

90 

91 Parameters 

92 ---------- 

93 time_str : `str` 

94 Input string. 

95 

96 Returns 

97 ------- 

98 time : `astropy.time.Time` 

99 

100 Raises 

101 ------ 

102 ValueError 

103 Raised if input string has unexpected format 

104 """ 

105 match = _re_time_str.match(time_str) 

106 if not match: 

107 raise ValueError(f'Time string "{time_str}" does not match known formats') 

108 

109 value, fmt, scale = match.group("value", "format", "scale") 

110 if fmt is not None: 

111 fmt = fmt.lower() 

112 if fmt not in astropy.time.Time.FORMATS: 

113 raise ValueError(f'Time string "{time_str}" specifies unknown time format "{fmt}"') 

114 if scale is not None: 

115 scale = scale.lower() 

116 if scale not in astropy.time.Time.SCALES: 

117 raise ValueError(f'Time string "{time_str}" specifies unknown time scale "{scale}"') 

118 

119 # convert number string to floating point 

120 if match.group("number") is not None: 

121 value = float(value) 

122 

123 # guess format if not given 

124 if fmt is None: 

125 if match.group("number") is not None: 

126 fmt = "mjd" 

127 elif match.group("iso") is not None: 

128 if "T" in value or "t" in value: 

129 fmt = "isot" 

130 else: 

131 fmt = "iso" 

132 elif match.group("fits") is not None: 

133 fmt = "fits" 

134 elif match.group("yday") is not None: 

135 fmt = "yday" 

136 assert fmt is not None 

137 

138 # guess scale if not given 

139 if scale is None: 

140 if fmt in ("iso", "isot", "fits", "yday", "unix"): 

141 scale = "utc" 

142 elif fmt == "cxcsec": 

143 scale = "tt" 

144 else: 

145 scale = "tai" 

146 

147 try: 

148 # Hide warnings about future dates 

149 with warnings.catch_warnings(): 

150 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning) 

151 if erfa is not None: 

152 warnings.simplefilter("ignore", category=erfa.ErfaWarning) 

153 value = astropy.time.Time(value, format=fmt, scale=scale) 

154 except ValueError: 

155 # astropy makes very verbose exception that is not super-useful in 

156 # many context, just say we don't like it. 

157 raise ValueError(f'Time string "{time_str}" does not match format "{fmt}"') from None 

158 

159 return value 

160 

161 

162# ------------------------ 

163# Exported definitions -- 

164# ------------------------ 

165 

166 

167class ParserYaccError(Exception): 

168 """Base class for exceptions generated by parser.""" 

169 

170 pass 

171 

172 

173class ParseError(ParserYaccError): 

174 """Exception raised for parsing errors. 

175 

176 Attributes 

177 ---------- 

178 expression : str 

179 Full initial expression being parsed 

180 token : str 

181 Current token at parsing position 

182 pos : int 

183 Current parsing position, offset from beginning of expression in 

184 characters 

185 lineno : int 

186 Current line number in the expression 

187 posInLine : int 

188 Parsing position in current line, 0-based 

189 """ 

190 

191 def __init__(self, expression, token, pos, lineno): 

192 self.expression = expression 

193 self.token = token 

194 self.pos = pos 

195 self.lineno = lineno 

196 self.posInLine = self._posInLine() 

197 msg = "Syntax error at or near '{0}' (line: {1}, pos: {2})" 

198 msg = msg.format(token, lineno, self.posInLine + 1) 

199 ParserYaccError.__init__(self, msg) 

200 

201 def _posInLine(self): 

202 """Return position in current line""" 

203 lines = self.expression.split("\n") 

204 pos = self.pos 

205 for line in lines[: self.lineno - 1]: 

206 # +1 for newline 

207 pos -= len(line) + 1 

208 return pos 

209 

210 

211class ParserEOFError(ParserYaccError): 

212 """Exception raised for EOF-during-parser.""" 

213 

214 def __init__(self): 

215 Exception.__init__(self, "End of input reached while expecting further input") 

216 

217 

218class ParserYacc: 

219 """Class which defines PLY grammar. 

220 

221 Based on MySQL grammar for expressions 

222 (https://dev.mysql.com/doc/refman/5.7/en/expressions.html). 

223 

224 Parameters 

225 ---------- 

226 idMap : `collections.abc.Mapping` [ `str`, `Node` ], optional 

227 Mapping that provides substitutions for identifiers in the expression. 

228 The key in the map is the identifier name, the value is the 

229 `exprTree.Node` instance that will replace identifier in the full 

230 expression. If identifier does not exist in the mapping then 

231 `Identifier` is inserted into parse tree. 

232 **kwargs 

233 optional keyword arguments that are passed to `yacc.yacc` constructor. 

234 """ 

235 

236 def __init__(self, idMap=None, **kwargs): 

237 

238 kw = dict(write_tables=0, debug=False) 

239 kw.update(kwargs) 

240 

241 self.parser = yacc.yacc(module=self, **kw) 

242 self._idMap = idMap or {} 

243 

244 def parse(self, input, lexer=None, debug=False, tracking=False): 

245 """Parse input expression ad return parsed tree object. 

246 

247 This is a trivial wrapper for yacc.LRParser.parse method which 

248 provides lexer if not given in arguments. 

249 

250 Parameters 

251 ---------- 

252 input : str 

253 Expression to parse 

254 lexer : object, optional 

255 Lexer instance, if not given then ParserLex.make_lexer() is 

256 called to create one. 

257 debug : bool, optional 

258 Set to True for debugging output. 

259 tracking : bool, optional 

260 Set to True for tracking line numbers in parser. 

261 """ 

262 # make lexer 

263 if lexer is None: 

264 lexer = ParserLex.make_lexer() 

265 tree = self.parser.parse(input=input, lexer=lexer, debug=debug, tracking=tracking) 

266 return tree 

267 

268 tokens = ParserLex.tokens[:] 

269 

270 precedence = ( 

271 ("left", "OR"), 

272 ("left", "AND"), 

273 ("nonassoc", "OVERLAPS"), # Nonassociative operators 

274 ("nonassoc", "EQ", "NE"), # Nonassociative operators 

275 ("nonassoc", "LT", "LE", "GT", "GE"), # Nonassociative operators 

276 ("left", "ADD", "SUB"), 

277 ("left", "MUL", "DIV", "MOD"), 

278 ("right", "UPLUS", "UMINUS", "NOT"), # unary plus and minus 

279 ) 

280 

281 # this is the starting rule 

282 def p_input(self, p): 

283 """input : expr 

284 | empty 

285 """ 

286 p[0] = p[1] 

287 

288 def p_empty(self, p): 

289 """empty :""" 

290 p[0] = None 

291 

292 def p_expr(self, p): 

293 """expr : expr OR expr 

294 | expr AND expr 

295 | NOT expr 

296 | bool_primary 

297 """ 

298 if len(p) == 4: 

299 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3]) 

300 elif len(p) == 3: 

301 p[0] = UnaryOp(op=p[1].upper(), operand=p[2]) 

302 else: 

303 p[0] = p[1] 

304 

305 def p_bool_primary(self, p): 

306 """bool_primary : bool_primary EQ predicate 

307 | bool_primary NE predicate 

308 | bool_primary LT predicate 

309 | bool_primary LE predicate 

310 | bool_primary GE predicate 

311 | bool_primary GT predicate 

312 | bool_primary OVERLAPS predicate 

313 | predicate 

314 """ 

315 if len(p) == 2: 

316 p[0] = p[1] 

317 else: 

318 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) 

319 

320 def p_predicate(self, p): 

321 """predicate : bit_expr IN LPAREN literal_or_id_list RPAREN 

322 | bit_expr NOT IN LPAREN literal_or_id_list RPAREN 

323 | bit_expr 

324 """ 

325 if len(p) == 6: 

326 p[0] = IsIn(lhs=p[1], values=p[4]) 

327 elif len(p) == 7: 

328 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True) 

329 else: 

330 p[0] = p[1] 

331 

332 def p_identifier(self, p): 

333 """identifier : SIMPLE_IDENTIFIER 

334 | QUALIFIED_IDENTIFIER 

335 """ 

336 node = self._idMap.get(p[1]) 

337 if node is None: 

338 node = Identifier(p[1]) 

339 p[0] = node 

340 

341 def p_literal_or_id_list(self, p): 

342 """literal_or_id_list : literal_or_id_list COMMA literal 

343 | literal_or_id_list COMMA identifier 

344 | literal 

345 | identifier 

346 """ 

347 if len(p) == 2: 

348 p[0] = [p[1]] 

349 else: 

350 p[0] = p[1] + [p[3]] 

351 

352 def p_bit_expr(self, p): 

353 """bit_expr : bit_expr ADD bit_expr 

354 | bit_expr SUB bit_expr 

355 | bit_expr MUL bit_expr 

356 | bit_expr DIV bit_expr 

357 | bit_expr MOD bit_expr 

358 | simple_expr 

359 """ 

360 if len(p) == 2: 

361 p[0] = p[1] 

362 else: 

363 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) 

364 

365 def p_simple_expr_lit(self, p): 

366 """simple_expr : literal""" 

367 p[0] = p[1] 

368 

369 def p_simple_expr_id(self, p): 

370 """simple_expr : identifier""" 

371 p[0] = p[1] 

372 

373 def p_simple_expr_function_call(self, p): 

374 """simple_expr : function_call""" 

375 p[0] = p[1] 

376 

377 def p_simple_expr_unary(self, p): 

378 """simple_expr : ADD simple_expr %prec UPLUS 

379 | SUB simple_expr %prec UMINUS 

380 """ 

381 p[0] = UnaryOp(op=p[1], operand=p[2]) 

382 

383 def p_simple_expr_paren(self, p): 

384 """simple_expr : LPAREN expr RPAREN""" 

385 p[0] = Parens(p[2]) 

386 

387 def p_simple_expr_tuple(self, p): 

388 """simple_expr : LPAREN expr COMMA expr RPAREN""" 

389 # For now we only support tuples with two items, 

390 # these are used for time ranges. 

391 p[0] = TupleNode((p[2], p[4])) 

392 

393 def p_literal_num(self, p): 

394 """literal : NUMERIC_LITERAL""" 

395 p[0] = NumericLiteral(p[1]) 

396 

397 def p_literal_num_signed(self, p): 

398 """literal : ADD NUMERIC_LITERAL %prec UPLUS 

399 | SUB NUMERIC_LITERAL %prec UMINUS 

400 """ 

401 p[0] = NumericLiteral(p[1] + p[2]) 

402 

403 def p_literal_str(self, p): 

404 """literal : STRING_LITERAL""" 

405 p[0] = StringLiteral(p[1]) 

406 

407 def p_literal_time(self, p): 

408 """literal : TIME_LITERAL""" 

409 try: 

410 value = _parseTimeString(p[1]) 

411 except ValueError: 

412 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1)) 

413 p[0] = TimeLiteral(value) 

414 

415 def p_literal_range(self, p): 

416 """literal : RANGE_LITERAL""" 

417 # RANGE_LITERAL value is tuple of three numbers 

418 start, stop, stride = p[1] 

419 p[0] = RangeLiteral(start, stop, stride) 

420 

421 def p_function_call(self, p): 

422 """function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN""" 

423 p[0] = function_call(p[1], p[3]) 

424 

425 def p_expr_list(self, p): 

426 """expr_list : expr_list COMMA expr 

427 | expr 

428 | empty 

429 """ 

430 if len(p) == 2: 

431 if p[1] is None: 

432 p[0] = [] 

433 else: 

434 p[0] = [p[1]] 

435 else: 

436 p[0] = p[1] + [p[3]] 

437 

438 # ---------- end of all grammar rules ---------- 

439 

440 # Error rule for syntax errors 

441 def p_error(self, p): 

442 if p is None: 

443 raise ParserEOFError() 

444 else: 

445 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)