Coverage for python/lsst/daf/butler/registry/queries/exprParser/parserYacc.py : 22%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22# type: ignore
24"""Syntax definition for user expression parser.
25"""
27__all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"]
29# -------------------------------
30# Imports of standard modules --
31# -------------------------------
32import re
33import warnings
35# -----------------------------
36# Imports for other modules --
37# -----------------------------
38import astropy.time
40# As of astropy 4.2, the erfa interface is shipped independently and
41# ErfaWarning is no longer an AstropyWarning
42try:
43 import erfa
44except ImportError:
45 erfa = None
47from .exprTree import (BinaryOp, function_call, Identifier, IsIn, NumericLiteral, Parens,
48 RangeLiteral, StringLiteral, TimeLiteral, TupleNode, UnaryOp)
49from .ply import yacc
50from .parserLex import ParserLex
52# ----------------------------------
53# Local non-exported definitions --
54# ----------------------------------
56# The purpose of this regex is to guess time format if it is not explicitly
57# provided in the string itself
58_re_time_str = re.compile(r"""
59 ((?P<format>\w+)/)? # optionally prefixed by "format/"
60 (?P<value>
61 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number
62 |
63 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t)
64 |
65 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits
66 |
67 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday
68 )
69 (/(?P<scale>\w+))? # optionally followed by "/scale"
70 $
71""", re.VERBOSE | re.IGNORECASE)
74def _parseTimeString(time_str):
75 """Try to convert time string into astropy.Time.
77 Parameters
78 ----------
79 time_str : `str`
80 Input string.
82 Returns
83 -------
84 time : `astropy.time.Time`
86 Raises
87 ------
88 ValueError
89 Raised if input string has unexpected format
90 """
91 match = _re_time_str.match(time_str)
92 if not match:
93 raise ValueError(f"Time string \"{time_str}\" does not match known formats")
95 value, fmt, scale = match.group("value", "format", "scale")
96 if fmt is not None:
97 fmt = fmt.lower()
98 if fmt not in astropy.time.Time.FORMATS:
99 raise ValueError(f"Time string \"{time_str}\" specifies unknown time format \"{fmt}\"")
100 if scale is not None:
101 scale = scale.lower()
102 if scale not in astropy.time.Time.SCALES:
103 raise ValueError(f"Time string \"{time_str}\" specifies unknown time scale \"{scale}\"")
105 # convert number string to floating point
106 if match.group("number") is not None:
107 value = float(value)
109 # guess format if not given
110 if fmt is None:
111 if match.group("number") is not None:
112 fmt = "mjd"
113 elif match.group("iso") is not None:
114 if "T" in value or "t" in value:
115 fmt = "isot"
116 else:
117 fmt = "iso"
118 elif match.group("fits") is not None:
119 fmt = "fits"
120 elif match.group("yday") is not None:
121 fmt = "yday"
122 assert fmt is not None
124 # guess scale if not given
125 if scale is None:
126 if fmt in ("iso", "isot", "fits", "yday", "unix"):
127 scale = "utc"
128 elif fmt == "cxcsec":
129 scale = "tt"
130 else:
131 scale = "tai"
133 try:
134 # Hide warnings about future dates
135 with warnings.catch_warnings():
136 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning)
137 if erfa is not None:
138 warnings.simplefilter("ignore", category=erfa.ErfaWarning)
139 value = astropy.time.Time(value, format=fmt, scale=scale)
140 except ValueError:
141 # astropy makes very verbose exception that is not super-useful in
142 # many context, just say we don't like it.
143 raise ValueError(f"Time string \"{time_str}\" does not match format \"{fmt}\"") from None
145 return value
147# ------------------------
148# Exported definitions --
149# ------------------------
152class ParserYaccError(Exception):
153 """Base class for exceptions generated by parser.
154 """
155 pass
158class ParseError(ParserYaccError):
159 """Exception raised for parsing errors.
161 Attributes
162 ----------
163 expression : str
164 Full initial expression being parsed
165 token : str
166 Current token at parsing position
167 pos : int
168 Current parsing position, offset from beginning of expression in
169 characters
170 lineno : int
171 Current line number in the expression
172 posInLine : int
173 Parsing position in current line, 0-based
174 """
176 def __init__(self, expression, token, pos, lineno):
177 self.expression = expression
178 self.token = token
179 self.pos = pos
180 self.lineno = lineno
181 self.posInLine = self._posInLine()
182 msg = "Syntax error at or near '{0}' (line: {1}, pos: {2})"
183 msg = msg.format(token, lineno, self.posInLine + 1)
184 ParserYaccError.__init__(self, msg)
186 def _posInLine(self):
187 """Return position in current line"""
188 lines = self.expression.split('\n')
189 pos = self.pos
190 for line in lines[:self.lineno - 1]:
191 # +1 for newline
192 pos -= len(line) + 1
193 return pos
196class ParserEOFError(ParserYaccError):
197 """Exception raised for EOF-during-parser.
198 """
200 def __init__(self):
201 Exception.__init__(self,
202 "End of input reached while expecting further input")
205class ParserYacc:
206 """Class which defines PLY grammar.
208 Based on MySQL grammar for expressions
209 (https://dev.mysql.com/doc/refman/5.7/en/expressions.html).
211 Parameters
212 ----------
213 idMap : `collections.abc.Mapping` [ `str`, `Node` ], optional
214 Mapping that provides substitutions for identifiers in the expression.
215 The key in the map is the identifier name, the value is the
216 `exprTree.Node` instance that will replace identifier in the full
217 expression. If identifier does not exist in the mapping then
218 `Identifier` is inserted into parse tree.
219 **kwargs
220 optional keyword arguments that are passed to `yacc.yacc` constructor.
221 """
223 def __init__(self, idMap=None, **kwargs):
225 kw = dict(write_tables=0, debug=False)
226 kw.update(kwargs)
228 self.parser = yacc.yacc(module=self, **kw)
229 self._idMap = idMap or {}
231 def parse(self, input, lexer=None, debug=False, tracking=False):
232 """Parse input expression ad return parsed tree object.
234 This is a trivial wrapper for yacc.LRParser.parse method which
235 provides lexer if not given in arguments.
237 Parameters
238 ----------
239 input : str
240 Expression to parse
241 lexer : object, optional
242 Lexer instance, if not given then ParserLex.make_lexer() is
243 called to create one.
244 debug : bool, optional
245 Set to True for debugging output.
246 tracking : bool, optional
247 Set to True for tracking line numbers in parser.
248 """
249 # make lexer
250 if lexer is None:
251 lexer = ParserLex.make_lexer()
252 tree = self.parser.parse(input=input, lexer=lexer, debug=debug,
253 tracking=tracking)
254 return tree
256 tokens = ParserLex.tokens[:]
258 precedence = (
259 ('left', 'OR'),
260 ('left', 'AND'),
261 ('nonassoc', 'OVERLAPS'), # Nonassociative operators
262 ('nonassoc', 'EQ', 'NE'), # Nonassociative operators
263 ('nonassoc', 'LT', 'LE', 'GT', 'GE'), # Nonassociative operators
264 ('left', 'ADD', 'SUB'),
265 ('left', 'MUL', 'DIV', 'MOD'),
266 ('right', 'UPLUS', 'UMINUS', 'NOT'), # unary plus and minus
267 )
269 # this is the starting rule
270 def p_input(self, p):
271 """ input : expr
272 | empty
273 """
274 p[0] = p[1]
276 def p_empty(self, p):
277 """ empty :
278 """
279 p[0] = None
281 def p_expr(self, p):
282 """ expr : expr OR expr
283 | expr AND expr
284 | NOT expr
285 | bool_primary
286 """
287 if len(p) == 4:
288 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3])
289 elif len(p) == 3:
290 p[0] = UnaryOp(op=p[1].upper(), operand=p[2])
291 else:
292 p[0] = p[1]
294 def p_bool_primary(self, p):
295 """ bool_primary : bool_primary EQ predicate
296 | bool_primary NE predicate
297 | bool_primary LT predicate
298 | bool_primary LE predicate
299 | bool_primary GE predicate
300 | bool_primary GT predicate
301 | bool_primary OVERLAPS predicate
302 | predicate
303 """
304 if len(p) == 2:
305 p[0] = p[1]
306 else:
307 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
309 def p_predicate(self, p):
310 """ predicate : bit_expr IN LPAREN literal_or_id_list RPAREN
311 | bit_expr NOT IN LPAREN literal_or_id_list RPAREN
312 | bit_expr
313 """
314 if len(p) == 6:
315 p[0] = IsIn(lhs=p[1], values=p[4])
316 elif len(p) == 7:
317 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True)
318 else:
319 p[0] = p[1]
321 def p_identifier(self, p):
322 """ identifier : SIMPLE_IDENTIFIER
323 | QUALIFIED_IDENTIFIER
324 """
325 node = self._idMap.get(p[1])
326 if node is None:
327 node = Identifier(p[1])
328 p[0] = node
330 def p_literal_or_id_list(self, p):
331 """ literal_or_id_list : literal_or_id_list COMMA literal
332 | literal_or_id_list COMMA identifier
333 | literal
334 | identifier
335 """
336 if len(p) == 2:
337 p[0] = [p[1]]
338 else:
339 p[0] = p[1] + [p[3]]
341 def p_bit_expr(self, p):
342 """ bit_expr : bit_expr ADD bit_expr
343 | bit_expr SUB bit_expr
344 | bit_expr MUL bit_expr
345 | bit_expr DIV bit_expr
346 | bit_expr MOD bit_expr
347 | simple_expr
348 """
349 if len(p) == 2:
350 p[0] = p[1]
351 else:
352 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
354 def p_simple_expr_lit(self, p):
355 """ simple_expr : literal
356 """
357 p[0] = p[1]
359 def p_simple_expr_id(self, p):
360 """ simple_expr : identifier
361 """
362 p[0] = p[1]
364 def p_simple_expr_function_call(self, p):
365 """ simple_expr : function_call
366 """
367 p[0] = p[1]
369 def p_simple_expr_unary(self, p):
370 """ simple_expr : ADD simple_expr %prec UPLUS
371 | SUB simple_expr %prec UMINUS
372 """
373 p[0] = UnaryOp(op=p[1], operand=p[2])
375 def p_simple_expr_paren(self, p):
376 """ simple_expr : LPAREN expr RPAREN
377 """
378 p[0] = Parens(p[2])
380 def p_simple_expr_tuple(self, p):
381 """ simple_expr : LPAREN expr COMMA expr RPAREN
382 """
383 # For now we only support tuples with two items,
384 # these are used for time ranges.
385 p[0] = TupleNode((p[2], p[4]))
387 def p_literal_num(self, p):
388 """ literal : NUMERIC_LITERAL
389 """
390 p[0] = NumericLiteral(p[1])
392 def p_literal_num_signed(self, p):
393 """ literal : ADD NUMERIC_LITERAL %prec UPLUS
394 | SUB NUMERIC_LITERAL %prec UMINUS
395 """
396 p[0] = NumericLiteral(p[1] + p[2])
398 def p_literal_str(self, p):
399 """ literal : STRING_LITERAL
400 """
401 p[0] = StringLiteral(p[1])
403 def p_literal_time(self, p):
404 """ literal : TIME_LITERAL
405 """
406 try:
407 value = _parseTimeString(p[1])
408 except ValueError:
409 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1))
410 p[0] = TimeLiteral(value)
412 def p_literal_range(self, p):
413 """ literal : RANGE_LITERAL
414 """
415 # RANGE_LITERAL value is tuple of three numbers
416 start, stop, stride = p[1]
417 p[0] = RangeLiteral(start, stop, stride)
419 def p_function_call(self, p):
420 """ function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN
421 """
422 p[0] = function_call(p[1], p[3])
424 def p_expr_list(self, p):
425 """ expr_list : expr_list COMMA expr
426 | expr
427 | empty
428 """
429 if len(p) == 2:
430 if p[1] is None:
431 p[0] = []
432 else:
433 p[0] = [p[1]]
434 else:
435 p[0] = p[1] + [p[3]]
437 # ---------- end of all grammar rules ----------
439 # Error rule for syntax errors
440 def p_error(self, p):
441 if p is None:
442 raise ParserEOFError()
443 else:
444 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)