Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22# type: ignore 

23 

24"""Syntax definition for user expression parser. 

25""" 

26 

27__all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"] 

28 

29# ------------------------------- 

30# Imports of standard modules -- 

31# ------------------------------- 

32import re 

33 

34# ----------------------------- 

35# Imports for other modules -- 

36# ----------------------------- 

37import astropy.time 

38from .exprTree import (BinaryOp, function_call, Identifier, IsIn, NumericLiteral, Parens, 

39 RangeLiteral, StringLiteral, TimeLiteral, TupleNode, UnaryOp) 

40from .ply import yacc 

41from .parserLex import ParserLex 

42 

43# ---------------------------------- 

44# Local non-exported definitions -- 

45# ---------------------------------- 

46 

47# The purpose of this regex is to guess time format if it is not explicitly 

48# provided in the string itself 

49_re_time_str = re.compile(r""" 

50 ((?P<format>\w+)/)? # optionally prefixed by "format/" 

51 (?P<value> 

52 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number 

53 | 

54 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t) 

55 | 

56 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits 

57 | 

58 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday 

59 ) 

60 (/(?P<scale>\w+))? # optionally followed by "/scale" 

61 $ 

62""", re.VERBOSE | re.IGNORECASE) 

63 

64 

65def _parseTimeString(time_str): 

66 """Try to convert time string into astropy.Time. 

67 

68 Parameters 

69 ---------- 

70 time_str : `str` 

71 Input string. 

72 

73 Returns 

74 ------- 

75 time : `astropy.time.Time` 

76 

77 Raises 

78 ------ 

79 ValueError 

80 Raised if input string has unexpected format 

81 """ 

82 match = _re_time_str.match(time_str) 

83 if not match: 

84 raise ValueError(f"Time string \"{time_str}\" does not match known formats") 

85 

86 value, fmt, scale = match.group("value", "format", "scale") 

87 if fmt is not None: 

88 fmt = fmt.lower() 

89 if fmt not in astropy.time.Time.FORMATS: 

90 raise ValueError(f"Time string \"{time_str}\" specifies unknown time format \"{fmt}\"") 

91 if scale is not None: 

92 scale = scale.lower() 

93 if scale not in astropy.time.Time.SCALES: 

94 raise ValueError(f"Time string \"{time_str}\" specifies unknown time scale \"{scale}\"") 

95 

96 # convert number string to floating point 

97 if match.group("number") is not None: 

98 value = float(value) 

99 

100 # guess format if not given 

101 if fmt is None: 

102 if match.group("number") is not None: 

103 fmt = "mjd" 

104 elif match.group("iso") is not None: 

105 if "T" in value or "t" in value: 

106 fmt = "isot" 

107 else: 

108 fmt = "iso" 

109 elif match.group("fits") is not None: 

110 fmt = "fits" 

111 elif match.group("yday") is not None: 

112 fmt = "yday" 

113 assert fmt is not None 

114 

115 # guess scale if not given 

116 if scale is None: 

117 if fmt in ("iso", "isot", "fits", "yday", "unix"): 

118 scale = "utc" 

119 elif fmt == "cxcsec": 

120 scale = "tt" 

121 else: 

122 scale = "tai" 

123 

124 try: 

125 value = astropy.time.Time(value, format=fmt, scale=scale) 

126 except ValueError: 

127 # astropy makes very verbose exception that is not super-useful in 

128 # many context, just say we don't like it. 

129 raise ValueError(f"Time string \"{time_str}\" does not match format \"{fmt}\"") from None 

130 

131 return value 

132 

133# ------------------------ 

134# Exported definitions -- 

135# ------------------------ 

136 

137 

138class ParserYaccError(Exception): 

139 """Base class for exceptions generated by parser. 

140 """ 

141 pass 

142 

143 

144class ParseError(ParserYaccError): 

145 """Exception raised for parsing errors. 

146 

147 Attributes 

148 ---------- 

149 expression : str 

150 Full initial expression being parsed 

151 token : str 

152 Current token at parsing position 

153 pos : int 

154 Current parsing position, offset from beginning of expression in 

155 characters 

156 lineno : int 

157 Current line number in the expression 

158 posInLine : int 

159 Parsing position in current line, 0-based 

160 """ 

161 

162 def __init__(self, expression, token, pos, lineno): 

163 self.expression = expression 

164 self.token = token 

165 self.pos = pos 

166 self.lineno = lineno 

167 self.posInLine = self._posInLine() 

168 msg = "Syntax error at or near '{0}' (line: {1}, pos: {2})" 

169 msg = msg.format(token, lineno, self.posInLine + 1) 

170 ParserYaccError.__init__(self, msg) 

171 

172 def _posInLine(self): 

173 """Return position in current line""" 

174 lines = self.expression.split('\n') 

175 pos = self.pos 

176 for line in lines[:self.lineno - 1]: 

177 # +1 for newline 

178 pos -= len(line) + 1 

179 return pos 

180 

181 

182class ParserEOFError(ParserYaccError): 

183 """Exception raised for EOF-during-parser. 

184 """ 

185 

186 def __init__(self): 

187 Exception.__init__(self, 

188 "End of input reached while expecting further input") 

189 

190 

191class ParserYacc: 

192 """Class which defines PLY grammar. 

193 

194 Based on MySQL grammar for expressions 

195 (https://dev.mysql.com/doc/refman/5.7/en/expressions.html). 

196 

197 Parameters 

198 ---------- 

199 idMap : `collections.abc.Mapping` [ `str`, `Node` ], optional 

200 Mapping that provides substitutions for identifiers in the expression. 

201 The key in the map is the identifier name, the value is the 

202 `exprTree.Node` instance that will replace identifier in the full 

203 expression. If identifier does not exist in the mapping then 

204 `Identifier` is inserted into parse tree. 

205 **kwargs 

206 optional keyword arguments that are passed to `yacc.yacc` constructor. 

207 """ 

208 

209 def __init__(self, idMap=None, **kwargs): 

210 

211 kw = dict(write_tables=0, debug=False) 

212 kw.update(kwargs) 

213 

214 self.parser = yacc.yacc(module=self, **kw) 

215 self._idMap = idMap or {} 

216 

217 def parse(self, input, lexer=None, debug=False, tracking=False): 

218 """Parse input expression ad return parsed tree object. 

219 

220 This is a trivial wrapper for yacc.LRParser.parse method which 

221 provides lexer if not given in arguments. 

222 

223 Parameters 

224 ---------- 

225 input : str 

226 Expression to parse 

227 lexer : object, optional 

228 Lexer instance, if not given then ParserLex.make_lexer() is 

229 called to create one. 

230 debug : bool, optional 

231 Set to True for debugging output. 

232 tracking : bool, optional 

233 Set to True for tracking line numbers in parser. 

234 """ 

235 # make lexer 

236 if lexer is None: 

237 lexer = ParserLex.make_lexer() 

238 tree = self.parser.parse(input=input, lexer=lexer, debug=debug, 

239 tracking=tracking) 

240 return tree 

241 

242 tokens = ParserLex.tokens[:] 

243 

244 precedence = ( 

245 ('left', 'OR'), 

246 ('left', 'AND'), 

247 ('nonassoc', 'OVERLAPS'), # Nonassociative operators 

248 ('nonassoc', 'EQ', 'NE'), # Nonassociative operators 

249 ('nonassoc', 'LT', 'LE', 'GT', 'GE'), # Nonassociative operators 

250 ('left', 'ADD', 'SUB'), 

251 ('left', 'MUL', 'DIV', 'MOD'), 

252 ('right', 'UPLUS', 'UMINUS', 'NOT'), # unary plus and minus 

253 ) 

254 

255 # this is the starting rule 

256 def p_input(self, p): 

257 """ input : expr 

258 | empty 

259 """ 

260 p[0] = p[1] 

261 

262 def p_empty(self, p): 

263 """ empty : 

264 """ 

265 p[0] = None 

266 

267 def p_expr(self, p): 

268 """ expr : expr OR expr 

269 | expr AND expr 

270 | NOT expr 

271 | bool_primary 

272 """ 

273 if len(p) == 4: 

274 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3]) 

275 elif len(p) == 3: 

276 p[0] = UnaryOp(op=p[1].upper(), operand=p[2]) 

277 else: 

278 p[0] = p[1] 

279 

280 def p_bool_primary(self, p): 

281 """ bool_primary : bool_primary EQ predicate 

282 | bool_primary NE predicate 

283 | bool_primary LT predicate 

284 | bool_primary LE predicate 

285 | bool_primary GE predicate 

286 | bool_primary GT predicate 

287 | bool_primary OVERLAPS predicate 

288 | predicate 

289 """ 

290 if len(p) == 2: 

291 p[0] = p[1] 

292 else: 

293 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) 

294 

295 def p_predicate(self, p): 

296 """ predicate : bit_expr IN LPAREN literal_or_id_list RPAREN 

297 | bit_expr NOT IN LPAREN literal_or_id_list RPAREN 

298 | bit_expr 

299 """ 

300 if len(p) == 6: 

301 p[0] = IsIn(lhs=p[1], values=p[4]) 

302 elif len(p) == 7: 

303 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True) 

304 else: 

305 p[0] = p[1] 

306 

307 def p_identifier(self, p): 

308 """ identifier : SIMPLE_IDENTIFIER 

309 | QUALIFIED_IDENTIFIER 

310 """ 

311 node = self._idMap.get(p[1]) 

312 if node is None: 

313 node = Identifier(p[1]) 

314 p[0] = node 

315 

316 def p_literal_or_id_list(self, p): 

317 """ literal_or_id_list : literal_or_id_list COMMA literal 

318 | literal_or_id_list COMMA identifier 

319 | literal 

320 | identifier 

321 """ 

322 if len(p) == 2: 

323 p[0] = [p[1]] 

324 else: 

325 p[0] = p[1] + [p[3]] 

326 

327 def p_bit_expr(self, p): 

328 """ bit_expr : bit_expr ADD bit_expr 

329 | bit_expr SUB bit_expr 

330 | bit_expr MUL bit_expr 

331 | bit_expr DIV bit_expr 

332 | bit_expr MOD bit_expr 

333 | simple_expr 

334 """ 

335 if len(p) == 2: 

336 p[0] = p[1] 

337 else: 

338 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) 

339 

340 def p_simple_expr_lit(self, p): 

341 """ simple_expr : literal 

342 """ 

343 p[0] = p[1] 

344 

345 def p_simple_expr_id(self, p): 

346 """ simple_expr : identifier 

347 """ 

348 p[0] = p[1] 

349 

350 def p_simple_expr_function_call(self, p): 

351 """ simple_expr : function_call 

352 """ 

353 p[0] = p[1] 

354 

355 def p_simple_expr_unary(self, p): 

356 """ simple_expr : ADD simple_expr %prec UPLUS 

357 | SUB simple_expr %prec UMINUS 

358 """ 

359 p[0] = UnaryOp(op=p[1], operand=p[2]) 

360 

361 def p_simple_expr_paren(self, p): 

362 """ simple_expr : LPAREN expr RPAREN 

363 """ 

364 p[0] = Parens(p[2]) 

365 

366 def p_simple_expr_tuple(self, p): 

367 """ simple_expr : LPAREN expr COMMA expr RPAREN 

368 """ 

369 # For now we only support tuples with two items, 

370 # these are used for time ranges. 

371 p[0] = TupleNode((p[2], p[4])) 

372 

373 def p_literal_num(self, p): 

374 """ literal : NUMERIC_LITERAL 

375 """ 

376 p[0] = NumericLiteral(p[1]) 

377 

378 def p_literal_num_signed(self, p): 

379 """ literal : ADD NUMERIC_LITERAL %prec UPLUS 

380 | SUB NUMERIC_LITERAL %prec UMINUS 

381 """ 

382 p[0] = NumericLiteral(p[1] + p[2]) 

383 

384 def p_literal_str(self, p): 

385 """ literal : STRING_LITERAL 

386 """ 

387 p[0] = StringLiteral(p[1]) 

388 

389 def p_literal_time(self, p): 

390 """ literal : TIME_LITERAL 

391 """ 

392 try: 

393 value = _parseTimeString(p[1]) 

394 except ValueError: 

395 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1)) 

396 p[0] = TimeLiteral(value) 

397 

398 def p_literal_range(self, p): 

399 """ literal : RANGE_LITERAL 

400 """ 

401 # RANGE_LITERAL value is tuple of three numbers 

402 start, stop, stride = p[1] 

403 p[0] = RangeLiteral(start, stop, stride) 

404 

405 def p_function_call(self, p): 

406 """ function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN 

407 """ 

408 p[0] = function_call(p[1], p[3]) 

409 

410 def p_expr_list(self, p): 

411 """ expr_list : expr_list COMMA expr 

412 | expr 

413 | empty 

414 """ 

415 if len(p) == 2: 

416 if p[1] is None: 

417 p[0] = [] 

418 else: 

419 p[0] = [p[1]] 

420 else: 

421 p[0] = p[1] + [p[3]] 

422 

423 # ---------- end of all grammar rules ---------- 

424 

425 # Error rule for syntax errors 

426 def p_error(self, p): 

427 if p is None: 

428 raise ParserEOFError() 

429 else: 

430 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)