Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py: 21%
161 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-26 02:02 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-26 02:02 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22# type: ignore
24"""Syntax definition for user expression parser.
25"""
27__all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"]
29# -------------------------------
30# Imports of standard modules --
31# -------------------------------
32import re
33import warnings
35# -----------------------------
36# Imports for other modules --
37# -----------------------------
38import astropy.time
40# As of astropy 4.2, the erfa interface is shipped independently and
41# ErfaWarning is no longer an AstropyWarning
42try:
43 import erfa
44except ImportError:
45 erfa = None
47from .exprTree import (
48 BinaryOp,
49 Identifier,
50 IsIn,
51 NumericLiteral,
52 Parens,
53 RangeLiteral,
54 StringLiteral,
55 TimeLiteral,
56 TupleNode,
57 UnaryOp,
58 function_call,
59)
60from .parserLex import ParserLex
61from .ply import yacc
63# ----------------------------------
64# Local non-exported definitions --
65# ----------------------------------
67# The purpose of this regex is to guess time format if it is not explicitly
68# provided in the string itself
69_re_time_str = re.compile(
70 r"""
71 ((?P<format>\w+)/)? # optionally prefixed by "format/"
72 (?P<value>
73 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number
74 |
75 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t)
76 |
77 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits
78 |
79 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday
80 )
81 (/(?P<scale>\w+))? # optionally followed by "/scale"
82 $
83""",
84 re.VERBOSE | re.IGNORECASE,
85)
88def _parseTimeString(time_str):
89 """Try to convert time string into astropy.Time.
91 Parameters
92 ----------
93 time_str : `str`
94 Input string.
96 Returns
97 -------
98 time : `astropy.time.Time`
100 Raises
101 ------
102 ValueError
103 Raised if input string has unexpected format
104 """
105 match = _re_time_str.match(time_str)
106 if not match:
107 raise ValueError(f'Time string "{time_str}" does not match known formats')
109 value, fmt, scale = match.group("value", "format", "scale")
110 if fmt is not None:
111 fmt = fmt.lower()
112 if fmt not in astropy.time.Time.FORMATS:
113 raise ValueError(f'Time string "{time_str}" specifies unknown time format "{fmt}"')
114 if scale is not None:
115 scale = scale.lower()
116 if scale not in astropy.time.Time.SCALES:
117 raise ValueError(f'Time string "{time_str}" specifies unknown time scale "{scale}"')
119 # convert number string to floating point
120 if match.group("number") is not None:
121 value = float(value)
123 # guess format if not given
124 if fmt is None:
125 if match.group("number") is not None:
126 fmt = "mjd"
127 elif match.group("iso") is not None:
128 if "T" in value or "t" in value:
129 fmt = "isot"
130 else:
131 fmt = "iso"
132 elif match.group("fits") is not None:
133 fmt = "fits"
134 elif match.group("yday") is not None:
135 fmt = "yday"
136 assert fmt is not None
138 # guess scale if not given
139 if scale is None:
140 if fmt in ("iso", "isot", "fits", "yday", "unix"):
141 scale = "utc"
142 elif fmt == "cxcsec":
143 scale = "tt"
144 else:
145 scale = "tai"
147 try:
148 # Hide warnings about future dates
149 with warnings.catch_warnings():
150 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning)
151 if erfa is not None:
152 warnings.simplefilter("ignore", category=erfa.ErfaWarning)
153 value = astropy.time.Time(value, format=fmt, scale=scale)
154 except ValueError:
155 # astropy makes very verbose exception that is not super-useful in
156 # many context, just say we don't like it.
157 raise ValueError(f'Time string "{time_str}" does not match format "{fmt}"') from None
159 return value
162# ------------------------
163# Exported definitions --
164# ------------------------
167class ParserYaccError(Exception):
168 """Base class for exceptions generated by parser."""
170 pass
173class ParseError(ParserYaccError):
174 """Exception raised for parsing errors.
176 Attributes
177 ----------
178 expression : str
179 Full initial expression being parsed
180 token : str
181 Current token at parsing position
182 pos : int
183 Current parsing position, offset from beginning of expression in
184 characters
185 lineno : int
186 Current line number in the expression
187 posInLine : int
188 Parsing position in current line, 0-based
189 """
191 def __init__(self, expression, token, pos, lineno):
192 self.expression = expression
193 self.token = token
194 self.pos = pos
195 self.lineno = lineno
196 self.posInLine = self._posInLine()
197 msg = "Syntax error at or near '{0}' (line: {1}, pos: {2})"
198 msg = msg.format(token, lineno, self.posInLine + 1)
199 ParserYaccError.__init__(self, msg)
201 def _posInLine(self):
202 """Return position in current line"""
203 lines = self.expression.split("\n")
204 pos = self.pos
205 for line in lines[: self.lineno - 1]:
206 # +1 for newline
207 pos -= len(line) + 1
208 return pos
211class ParserEOFError(ParserYaccError):
212 """Exception raised for EOF-during-parser."""
214 def __init__(self):
215 Exception.__init__(self, "End of input reached while expecting further input")
218class ParserYacc:
219 """Class which defines PLY grammar.
221 Based on MySQL grammar for expressions
222 (https://dev.mysql.com/doc/refman/5.7/en/expressions.html).
224 Parameters
225 ----------
226 idMap : `collections.abc.Mapping` [ `str`, `Node` ], optional
227 Mapping that provides substitutions for identifiers in the expression.
228 The key in the map is the identifier name, the value is the
229 `exprTree.Node` instance that will replace identifier in the full
230 expression. If identifier does not exist in the mapping then
231 `Identifier` is inserted into parse tree.
232 **kwargs
233 optional keyword arguments that are passed to `yacc.yacc` constructor.
234 """
236 def __init__(self, idMap=None, **kwargs):
238 kw = dict(write_tables=0, debug=False)
239 kw.update(kwargs)
241 self.parser = yacc.yacc(module=self, **kw)
242 self._idMap = idMap or {}
244 def parse(self, input, lexer=None, debug=False, tracking=False):
245 """Parse input expression ad return parsed tree object.
247 This is a trivial wrapper for yacc.LRParser.parse method which
248 provides lexer if not given in arguments.
250 Parameters
251 ----------
252 input : str
253 Expression to parse
254 lexer : object, optional
255 Lexer instance, if not given then ParserLex.make_lexer() is
256 called to create one.
257 debug : bool, optional
258 Set to True for debugging output.
259 tracking : bool, optional
260 Set to True for tracking line numbers in parser.
261 """
262 # make lexer
263 if lexer is None:
264 lexer = ParserLex.make_lexer()
265 tree = self.parser.parse(input=input, lexer=lexer, debug=debug, tracking=tracking)
266 return tree
268 tokens = ParserLex.tokens[:]
270 precedence = (
271 ("left", "OR"),
272 ("left", "AND"),
273 ("nonassoc", "OVERLAPS"), # Nonassociative operators
274 ("nonassoc", "EQ", "NE"), # Nonassociative operators
275 ("nonassoc", "LT", "LE", "GT", "GE"), # Nonassociative operators
276 ("left", "ADD", "SUB"),
277 ("left", "MUL", "DIV", "MOD"),
278 ("right", "UPLUS", "UMINUS", "NOT"), # unary plus and minus
279 )
281 # this is the starting rule
282 def p_input(self, p):
283 """input : expr
284 | empty
285 """
286 p[0] = p[1]
288 def p_empty(self, p):
289 """empty :"""
290 p[0] = None
292 def p_expr(self, p):
293 """expr : expr OR expr
294 | expr AND expr
295 | NOT expr
296 | bool_primary
297 """
298 if len(p) == 4:
299 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3])
300 elif len(p) == 3:
301 p[0] = UnaryOp(op=p[1].upper(), operand=p[2])
302 else:
303 p[0] = p[1]
305 def p_bool_primary(self, p):
306 """bool_primary : bool_primary EQ predicate
307 | bool_primary NE predicate
308 | bool_primary LT predicate
309 | bool_primary LE predicate
310 | bool_primary GE predicate
311 | bool_primary GT predicate
312 | bool_primary OVERLAPS predicate
313 | predicate
314 """
315 if len(p) == 2:
316 p[0] = p[1]
317 else:
318 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
320 def p_predicate(self, p):
321 """predicate : bit_expr IN LPAREN literal_or_id_list RPAREN
322 | bit_expr NOT IN LPAREN literal_or_id_list RPAREN
323 | bit_expr
324 """
325 if len(p) == 6:
326 p[0] = IsIn(lhs=p[1], values=p[4])
327 elif len(p) == 7:
328 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True)
329 else:
330 p[0] = p[1]
332 def p_identifier(self, p):
333 """identifier : SIMPLE_IDENTIFIER
334 | QUALIFIED_IDENTIFIER
335 """
336 node = self._idMap.get(p[1])
337 if node is None:
338 node = Identifier(p[1])
339 p[0] = node
341 def p_literal_or_id_list(self, p):
342 """literal_or_id_list : literal_or_id_list COMMA literal
343 | literal_or_id_list COMMA identifier
344 | literal
345 | identifier
346 """
347 if len(p) == 2:
348 p[0] = [p[1]]
349 else:
350 p[0] = p[1] + [p[3]]
352 def p_bit_expr(self, p):
353 """bit_expr : bit_expr ADD bit_expr
354 | bit_expr SUB bit_expr
355 | bit_expr MUL bit_expr
356 | bit_expr DIV bit_expr
357 | bit_expr MOD bit_expr
358 | simple_expr
359 """
360 if len(p) == 2:
361 p[0] = p[1]
362 else:
363 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
365 def p_simple_expr_lit(self, p):
366 """simple_expr : literal"""
367 p[0] = p[1]
369 def p_simple_expr_id(self, p):
370 """simple_expr : identifier"""
371 p[0] = p[1]
373 def p_simple_expr_function_call(self, p):
374 """simple_expr : function_call"""
375 p[0] = p[1]
377 def p_simple_expr_unary(self, p):
378 """simple_expr : ADD simple_expr %prec UPLUS
379 | SUB simple_expr %prec UMINUS
380 """
381 p[0] = UnaryOp(op=p[1], operand=p[2])
383 def p_simple_expr_paren(self, p):
384 """simple_expr : LPAREN expr RPAREN"""
385 p[0] = Parens(p[2])
387 def p_simple_expr_tuple(self, p):
388 """simple_expr : LPAREN expr COMMA expr RPAREN"""
389 # For now we only support tuples with two items,
390 # these are used for time ranges.
391 p[0] = TupleNode((p[2], p[4]))
393 def p_literal_num(self, p):
394 """literal : NUMERIC_LITERAL"""
395 p[0] = NumericLiteral(p[1])
397 def p_literal_num_signed(self, p):
398 """literal : ADD NUMERIC_LITERAL %prec UPLUS
399 | SUB NUMERIC_LITERAL %prec UMINUS
400 """
401 p[0] = NumericLiteral(p[1] + p[2])
403 def p_literal_str(self, p):
404 """literal : STRING_LITERAL"""
405 p[0] = StringLiteral(p[1])
407 def p_literal_time(self, p):
408 """literal : TIME_LITERAL"""
409 try:
410 value = _parseTimeString(p[1])
411 except ValueError:
412 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1))
413 p[0] = TimeLiteral(value)
415 def p_literal_range(self, p):
416 """literal : RANGE_LITERAL"""
417 # RANGE_LITERAL value is tuple of three numbers
418 start, stop, stride = p[1]
419 p[0] = RangeLiteral(start, stop, stride)
421 def p_function_call(self, p):
422 """function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN"""
423 p[0] = function_call(p[1], p[3])
425 def p_expr_list(self, p):
426 """expr_list : expr_list COMMA expr
427 | expr
428 | empty
429 """
430 if len(p) == 2:
431 if p[1] is None:
432 p[0] = []
433 else:
434 p[0] = [p[1]]
435 else:
436 p[0] = p[1] + [p[3]]
438 # ---------- end of all grammar rules ----------
440 # Error rule for syntax errors
441 def p_error(self, p):
442 if p is None:
443 raise ParserEOFError()
444 else:
445 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)