Coverage for python/lsst/daf/butler/registry/queries/exprParser/parserYacc.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22# type: ignore
24"""Syntax definition for user expression parser.
25"""
27__all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"]
29# -------------------------------
30# Imports of standard modules --
31# -------------------------------
32import re
34# -----------------------------
35# Imports for other modules --
36# -----------------------------
37import astropy.time
38from .exprTree import (BinaryOp, function_call, Identifier, IsIn, NumericLiteral, Parens,
39 RangeLiteral, StringLiteral, TimeLiteral, TupleNode, UnaryOp)
40from .ply import yacc
41from .parserLex import ParserLex
43# ----------------------------------
44# Local non-exported definitions --
45# ----------------------------------
47# The purpose of this regex is to guess time format if it is not explicitly
48# provided in the string itself
49_re_time_str = re.compile(r"""
50 ((?P<format>\w+)/)? # optionally prefixed by "format/"
51 (?P<value>
52 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number
53 |
54 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t)
55 |
56 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits
57 |
58 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday
59 )
60 (/(?P<scale>\w+))? # optionally followed by "/scale"
61 $
62""", re.VERBOSE | re.IGNORECASE)
65def _parseTimeString(time_str):
66 """Try to convert time string into astropy.Time.
68 Parameters
69 ----------
70 time_str : `str`
71 Input string.
73 Returns
74 -------
75 time : `astropy.time.Time`
77 Raises
78 ------
79 ValueError
80 Raised if input string has unexpected format
81 """
82 match = _re_time_str.match(time_str)
83 if not match:
84 raise ValueError(f"Time string \"{time_str}\" does not match known formats")
86 value, fmt, scale = match.group("value", "format", "scale")
87 if fmt is not None:
88 fmt = fmt.lower()
89 if fmt not in astropy.time.Time.FORMATS:
90 raise ValueError(f"Time string \"{time_str}\" specifies unknown time format \"{fmt}\"")
91 if scale is not None:
92 scale = scale.lower()
93 if scale not in astropy.time.Time.SCALES:
94 raise ValueError(f"Time string \"{time_str}\" specifies unknown time scale \"{scale}\"")
96 # convert number string to floating point
97 if match.group("number") is not None:
98 value = float(value)
100 # guess format if not given
101 if fmt is None:
102 if match.group("number") is not None:
103 fmt = "mjd"
104 elif match.group("iso") is not None:
105 if "T" in value or "t" in value:
106 fmt = "isot"
107 else:
108 fmt = "iso"
109 elif match.group("fits") is not None:
110 fmt = "fits"
111 elif match.group("yday") is not None:
112 fmt = "yday"
113 assert fmt is not None
115 # guess scale if not given
116 if scale is None:
117 if fmt in ("iso", "isot", "fits", "yday", "unix"):
118 scale = "utc"
119 elif fmt == "cxcsec":
120 scale = "tt"
121 else:
122 scale = "tai"
124 try:
125 value = astropy.time.Time(value, format=fmt, scale=scale)
126 except ValueError:
127 # astropy makes very verbose exception that is not super-useful in
128 # many context, just say we don't like it.
129 raise ValueError(f"Time string \"{time_str}\" does not match format \"{fmt}\"") from None
131 return value
133# ------------------------
134# Exported definitions --
135# ------------------------
138class ParserYaccError(Exception):
139 """Base class for exceptions generated by parser.
140 """
141 pass
144class ParseError(ParserYaccError):
145 """Exception raised for parsing errors.
147 Attributes
148 ----------
149 expression : str
150 Full initial expression being parsed
151 token : str
152 Current token at parsing position
153 pos : int
154 Current parsing position, offset from beginning of expression in
155 characters
156 lineno : int
157 Current line number in the expression
158 posInLine : int
159 Parsing position in current line, 0-based
160 """
162 def __init__(self, expression, token, pos, lineno):
163 self.expression = expression
164 self.token = token
165 self.pos = pos
166 self.lineno = lineno
167 self.posInLine = self._posInLine()
168 msg = "Syntax error at or near '{0}' (line: {1}, pos: {2})"
169 msg = msg.format(token, lineno, self.posInLine + 1)
170 ParserYaccError.__init__(self, msg)
172 def _posInLine(self):
173 """Return position in current line"""
174 lines = self.expression.split('\n')
175 pos = self.pos
176 for line in lines[:self.lineno - 1]:
177 # +1 for newline
178 pos -= len(line) + 1
179 return pos
182class ParserEOFError(ParserYaccError):
183 """Exception raised for EOF-during-parser.
184 """
186 def __init__(self):
187 Exception.__init__(self,
188 "End of input reached while expecting further input")
191class ParserYacc:
192 """Class which defines PLY grammar.
194 Based on MySQL grammar for expressions
195 (https://dev.mysql.com/doc/refman/5.7/en/expressions.html).
197 Parameters
198 ----------
199 idMap : `collections.abc.Mapping` [ `str`, `Node` ], optional
200 Mapping that provides substitutions for identifiers in the expression.
201 The key in the map is the identifier name, the value is the
202 `exprTree.Node` instance that will replace identifier in the full
203 expression. If identifier does not exist in the mapping then
204 `Identifier` is inserted into parse tree.
205 **kwargs
206 optional keyword arguments that are passed to `yacc.yacc` constructor.
207 """
209 def __init__(self, idMap=None, **kwargs):
211 kw = dict(write_tables=0, debug=False)
212 kw.update(kwargs)
214 self.parser = yacc.yacc(module=self, **kw)
215 self._idMap = idMap or {}
217 def parse(self, input, lexer=None, debug=False, tracking=False):
218 """Parse input expression ad return parsed tree object.
220 This is a trivial wrapper for yacc.LRParser.parse method which
221 provides lexer if not given in arguments.
223 Parameters
224 ----------
225 input : str
226 Expression to parse
227 lexer : object, optional
228 Lexer instance, if not given then ParserLex.make_lexer() is
229 called to create one.
230 debug : bool, optional
231 Set to True for debugging output.
232 tracking : bool, optional
233 Set to True for tracking line numbers in parser.
234 """
235 # make lexer
236 if lexer is None:
237 lexer = ParserLex.make_lexer()
238 tree = self.parser.parse(input=input, lexer=lexer, debug=debug,
239 tracking=tracking)
240 return tree
242 tokens = ParserLex.tokens[:]
244 precedence = (
245 ('left', 'OR'),
246 ('left', 'AND'),
247 ('nonassoc', 'OVERLAPS'), # Nonassociative operators
248 ('nonassoc', 'EQ', 'NE'), # Nonassociative operators
249 ('nonassoc', 'LT', 'LE', 'GT', 'GE'), # Nonassociative operators
250 ('left', 'ADD', 'SUB'),
251 ('left', 'MUL', 'DIV', 'MOD'),
252 ('right', 'UPLUS', 'UMINUS', 'NOT'), # unary plus and minus
253 )
255 # this is the starting rule
256 def p_input(self, p):
257 """ input : expr
258 | empty
259 """
260 p[0] = p[1]
262 def p_empty(self, p):
263 """ empty :
264 """
265 p[0] = None
267 def p_expr(self, p):
268 """ expr : expr OR expr
269 | expr AND expr
270 | NOT expr
271 | bool_primary
272 """
273 if len(p) == 4:
274 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3])
275 elif len(p) == 3:
276 p[0] = UnaryOp(op=p[1].upper(), operand=p[2])
277 else:
278 p[0] = p[1]
280 def p_bool_primary(self, p):
281 """ bool_primary : bool_primary EQ predicate
282 | bool_primary NE predicate
283 | bool_primary LT predicate
284 | bool_primary LE predicate
285 | bool_primary GE predicate
286 | bool_primary GT predicate
287 | bool_primary OVERLAPS predicate
288 | predicate
289 """
290 if len(p) == 2:
291 p[0] = p[1]
292 else:
293 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
295 def p_predicate(self, p):
296 """ predicate : bit_expr IN LPAREN literal_or_id_list RPAREN
297 | bit_expr NOT IN LPAREN literal_or_id_list RPAREN
298 | bit_expr
299 """
300 if len(p) == 6:
301 p[0] = IsIn(lhs=p[1], values=p[4])
302 elif len(p) == 7:
303 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True)
304 else:
305 p[0] = p[1]
307 def p_identifier(self, p):
308 """ identifier : SIMPLE_IDENTIFIER
309 | QUALIFIED_IDENTIFIER
310 """
311 node = self._idMap.get(p[1])
312 if node is None:
313 node = Identifier(p[1])
314 p[0] = node
316 def p_literal_or_id_list(self, p):
317 """ literal_or_id_list : literal_or_id_list COMMA literal
318 | literal_or_id_list COMMA identifier
319 | literal
320 | identifier
321 """
322 if len(p) == 2:
323 p[0] = [p[1]]
324 else:
325 p[0] = p[1] + [p[3]]
327 def p_bit_expr(self, p):
328 """ bit_expr : bit_expr ADD bit_expr
329 | bit_expr SUB bit_expr
330 | bit_expr MUL bit_expr
331 | bit_expr DIV bit_expr
332 | bit_expr MOD bit_expr
333 | simple_expr
334 """
335 if len(p) == 2:
336 p[0] = p[1]
337 else:
338 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
340 def p_simple_expr_lit(self, p):
341 """ simple_expr : literal
342 """
343 p[0] = p[1]
345 def p_simple_expr_id(self, p):
346 """ simple_expr : identifier
347 """
348 p[0] = p[1]
350 def p_simple_expr_function_call(self, p):
351 """ simple_expr : function_call
352 """
353 p[0] = p[1]
355 def p_simple_expr_unary(self, p):
356 """ simple_expr : ADD simple_expr %prec UPLUS
357 | SUB simple_expr %prec UMINUS
358 """
359 p[0] = UnaryOp(op=p[1], operand=p[2])
361 def p_simple_expr_paren(self, p):
362 """ simple_expr : LPAREN expr RPAREN
363 """
364 p[0] = Parens(p[2])
366 def p_simple_expr_tuple(self, p):
367 """ simple_expr : LPAREN expr COMMA expr RPAREN
368 """
369 # For now we only support tuples with two items,
370 # these are used for time ranges.
371 p[0] = TupleNode((p[2], p[4]))
373 def p_literal_num(self, p):
374 """ literal : NUMERIC_LITERAL
375 """
376 p[0] = NumericLiteral(p[1])
378 def p_literal_num_signed(self, p):
379 """ literal : ADD NUMERIC_LITERAL %prec UPLUS
380 | SUB NUMERIC_LITERAL %prec UMINUS
381 """
382 p[0] = NumericLiteral(p[1] + p[2])
384 def p_literal_str(self, p):
385 """ literal : STRING_LITERAL
386 """
387 p[0] = StringLiteral(p[1])
389 def p_literal_time(self, p):
390 """ literal : TIME_LITERAL
391 """
392 try:
393 value = _parseTimeString(p[1])
394 except ValueError:
395 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1))
396 p[0] = TimeLiteral(value)
398 def p_literal_range(self, p):
399 """ literal : RANGE_LITERAL
400 """
401 # RANGE_LITERAL value is tuple of three numbers
402 start, stop, stride = p[1]
403 p[0] = RangeLiteral(start, stop, stride)
405 def p_function_call(self, p):
406 """ function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN
407 """
408 p[0] = function_call(p[1], p[3])
410 def p_expr_list(self, p):
411 """ expr_list : expr_list COMMA expr
412 | expr
413 | empty
414 """
415 if len(p) == 2:
416 if p[1] is None:
417 p[0] = []
418 else:
419 p[0] = [p[1]]
420 else:
421 p[0] = p[1] + [p[3]]
423 # ---------- end of all grammar rules ----------
425 # Error rule for syntax errors
426 def p_error(self, p):
427 if p is None:
428 raise ParserEOFError()
429 else:
430 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)