Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py: 21%
162 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-13 10:57 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28# type: ignore
30"""Syntax definition for user expression parser.
31"""
33__all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"]
35# -------------------------------
36# Imports of standard modules --
37# -------------------------------
38import re
39import warnings
41# -----------------------------
42# Imports for other modules --
43# -----------------------------
44import astropy.time
46# As of astropy 4.2, the erfa interface is shipped independently and
47# ErfaWarning is no longer an AstropyWarning
48try:
49 import erfa
50except ImportError:
51 erfa = None
53from .exprTree import (
54 BinaryOp,
55 Identifier,
56 IsIn,
57 NumericLiteral,
58 Parens,
59 RangeLiteral,
60 StringLiteral,
61 TimeLiteral,
62 TupleNode,
63 UnaryOp,
64 function_call,
65)
66from .parserLex import ParserLex
67from .ply import yacc
69# ----------------------------------
70# Local non-exported definitions --
71# ----------------------------------
73# The purpose of this regex is to guess time format if it is not explicitly
74# provided in the string itself
75_re_time_str = re.compile(
76 r"""
77 ((?P<format>\w+)/)? # optionally prefixed by "format/"
78 (?P<value>
79 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number
80 |
81 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t) [no timezone]
82 |
83 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits
84 |
85 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday
86 )
87 (/(?P<scale>\w+))? # optionally followed by "/scale"
88 $
89""",
90 re.VERBOSE | re.IGNORECASE,
91)
94def _parseTimeString(time_str):
95 """Try to convert time string into astropy.Time.
97 Parameters
98 ----------
99 time_str : `str`
100 Input string.
102 Returns
103 -------
104 time : `astropy.time.Time`
105 The parsed time.
107 Raises
108 ------
109 ValueError
110 Raised if input string has unexpected format.
111 """
112 # Check for time zone. Python datetime objects can be timezone-aware
113 # and if one has been stringified then there will be a +00:00 on the end.
114 # Special case UTC. Fail for other timezones.
115 time_str = time_str.replace("+00:00", "")
117 match = _re_time_str.match(time_str)
118 if not match:
119 raise ValueError(f'Time string "{time_str}" does not match known formats')
121 value, fmt, scale = match.group("value", "format", "scale")
122 if fmt is not None:
123 fmt = fmt.lower()
124 if fmt not in astropy.time.Time.FORMATS:
125 raise ValueError(f'Time string "{time_str}" specifies unknown time format "{fmt}"')
126 if scale is not None:
127 scale = scale.lower()
128 if scale not in astropy.time.Time.SCALES:
129 raise ValueError(f'Time string "{time_str}" specifies unknown time scale "{scale}"')
131 # convert number string to floating point
132 if match.group("number") is not None:
133 value = float(value)
135 # guess format if not given
136 if fmt is None:
137 if match.group("number") is not None:
138 fmt = "mjd"
139 elif match.group("iso") is not None:
140 if "T" in value or "t" in value:
141 fmt = "isot"
142 else:
143 fmt = "iso"
144 elif match.group("fits") is not None:
145 fmt = "fits"
146 elif match.group("yday") is not None:
147 fmt = "yday"
148 assert fmt is not None
150 # guess scale if not given
151 if scale is None:
152 if fmt in ("iso", "isot", "fits", "yday", "unix"):
153 scale = "utc"
154 elif fmt == "cxcsec":
155 scale = "tt"
156 else:
157 scale = "tai"
159 try:
160 # Hide warnings about future dates
161 with warnings.catch_warnings():
162 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning)
163 if erfa is not None:
164 warnings.simplefilter("ignore", category=erfa.ErfaWarning)
165 value = astropy.time.Time(value, format=fmt, scale=scale)
166 except ValueError:
167 # astropy makes very verbose exception that is not super-useful in
168 # many context, just say we don't like it.
169 raise ValueError(f'Time string "{time_str}" does not match format "{fmt}"') from None
171 return value
174# ------------------------
175# Exported definitions --
176# ------------------------
179class ParserYaccError(Exception):
180 """Base class for exceptions generated by parser."""
182 pass
185class ParseError(ParserYaccError):
186 """Exception raised for parsing errors.
188 Parameters
189 ----------
190 expression : `str`
191 Full initial expression being parsed.
192 token : `str`
193 Current token at parsing position.
194 pos : `int`
195 Current parsing position, offset from beginning of expression in
196 characters.
197 lineno : `int`
198 Current line number in the expression.
200 Attributes
201 ----------
202 expression : `str`
203 Full initial expression being parsed.
204 token : `str`
205 Current token at parsing position.
206 pos : `int`
207 Current parsing position, offset from beginning of expression in
208 characters.
209 lineno : `int`
210 Current line number in the expression.
211 posInLine : `int`
212 Parsing position in current line, 0-based.
213 """
215 def __init__(self, expression, token, pos, lineno):
216 self.expression = expression
217 self.token = token
218 self.pos = pos
219 self.lineno = lineno
220 self.posInLine = self._posInLine()
221 msg = "Syntax error at or near '{0}' (line: {1}, pos: {2})"
222 msg = msg.format(token, lineno, self.posInLine + 1)
223 ParserYaccError.__init__(self, msg)
225 def _posInLine(self):
226 """Return position in current line"""
227 lines = self.expression.split("\n")
228 pos = self.pos
229 for line in lines[: self.lineno - 1]:
230 # +1 for newline
231 pos -= len(line) + 1
232 return pos
235class ParserEOFError(ParserYaccError):
236 """Exception raised for EOF-during-parser."""
238 def __init__(self):
239 Exception.__init__(self, "End of input reached while expecting further input")
242class ParserYacc:
243 """Class which defines PLY grammar.
245 Based on MySQL grammar for expressions
246 (https://dev.mysql.com/doc/refman/5.7/en/expressions.html).
248 Parameters
249 ----------
250 idMap : `collections.abc.Mapping` [ `str`, `Node` ], optional
251 Mapping that provides substitutions for identifiers in the expression.
252 The key in the map is the identifier name, the value is the
253 `exprTree.Node` instance that will replace identifier in the full
254 expression. If identifier does not exist in the mapping then
255 `Identifier` is inserted into parse tree.
256 **kwargs
257 Optional keyword arguments that are passed to `yacc.yacc` constructor.
258 """
260 def __init__(self, idMap=None, **kwargs):
261 kw = dict(write_tables=0, debug=False)
262 kw.update(kwargs)
264 self.parser = yacc.yacc(module=self, **kw)
265 self._idMap = idMap or {}
267 def parse(self, input, lexer=None, debug=False, tracking=False):
268 """Parse input expression ad return parsed tree object.
270 This is a trivial wrapper for yacc.LRParser.parse method which
271 provides lexer if not given in arguments.
273 Parameters
274 ----------
275 input : `str`
276 Expression to parse.
277 lexer : `object`, optional
278 Lexer instance, if not given then ParserLex.make_lexer() is
279 called to create one.
280 debug : `bool`, optional
281 Set to True for debugging output.
282 tracking : `bool`, optional
283 Set to True for tracking line numbers in parser.
284 """
285 # make lexer
286 if lexer is None:
287 lexer = ParserLex.make_lexer()
288 tree = self.parser.parse(input=input, lexer=lexer, debug=debug, tracking=tracking)
289 return tree
291 tokens = ParserLex.tokens[:]
293 precedence = (
294 ("left", "OR"),
295 ("left", "AND"),
296 ("nonassoc", "OVERLAPS"), # Nonassociative operators
297 ("nonassoc", "EQ", "NE"), # Nonassociative operators
298 ("nonassoc", "LT", "LE", "GT", "GE"), # Nonassociative operators
299 ("left", "ADD", "SUB"),
300 ("left", "MUL", "DIV", "MOD"),
301 ("right", "UPLUS", "UMINUS", "NOT"), # unary plus and minus
302 )
304 # this is the starting rule
305 def p_input(self, p):
306 """input : expr
307 | empty
308 """
309 p[0] = p[1]
311 def p_empty(self, p):
312 """empty :"""
313 p[0] = None
315 def p_expr(self, p):
316 """expr : expr OR expr
317 | expr AND expr
318 | NOT expr
319 | bool_primary
320 """
321 if len(p) == 4:
322 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3])
323 elif len(p) == 3:
324 p[0] = UnaryOp(op=p[1].upper(), operand=p[2])
325 else:
326 p[0] = p[1]
328 def p_bool_primary(self, p):
329 """bool_primary : bool_primary EQ predicate
330 | bool_primary NE predicate
331 | bool_primary LT predicate
332 | bool_primary LE predicate
333 | bool_primary GE predicate
334 | bool_primary GT predicate
335 | bool_primary OVERLAPS predicate
336 | predicate
337 """
338 if len(p) == 2:
339 p[0] = p[1]
340 else:
341 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
343 def p_predicate(self, p):
344 """predicate : bit_expr IN LPAREN literal_or_id_list RPAREN
345 | bit_expr NOT IN LPAREN literal_or_id_list RPAREN
346 | bit_expr
347 """
348 if len(p) == 6:
349 p[0] = IsIn(lhs=p[1], values=p[4])
350 elif len(p) == 7:
351 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True)
352 else:
353 p[0] = p[1]
355 def p_identifier(self, p):
356 """identifier : SIMPLE_IDENTIFIER
357 | QUALIFIED_IDENTIFIER
358 """
359 node = self._idMap.get(p[1])
360 if node is None:
361 node = Identifier(p[1])
362 p[0] = node
364 def p_literal_or_id_list(self, p):
365 """literal_or_id_list : literal_or_id_list COMMA literal
366 | literal_or_id_list COMMA identifier
367 | literal
368 | identifier
369 """
370 if len(p) == 2:
371 p[0] = [p[1]]
372 else:
373 p[0] = p[1] + [p[3]]
375 def p_bit_expr(self, p):
376 """bit_expr : bit_expr ADD bit_expr
377 | bit_expr SUB bit_expr
378 | bit_expr MUL bit_expr
379 | bit_expr DIV bit_expr
380 | bit_expr MOD bit_expr
381 | simple_expr
382 """
383 if len(p) == 2:
384 p[0] = p[1]
385 else:
386 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
388 def p_simple_expr_lit(self, p):
389 """simple_expr : literal"""
390 p[0] = p[1]
392 def p_simple_expr_id(self, p):
393 """simple_expr : identifier"""
394 p[0] = p[1]
396 def p_simple_expr_function_call(self, p):
397 """simple_expr : function_call"""
398 p[0] = p[1]
400 def p_simple_expr_unary(self, p):
401 """simple_expr : ADD simple_expr %prec UPLUS
402 | SUB simple_expr %prec UMINUS
403 """
404 p[0] = UnaryOp(op=p[1], operand=p[2])
406 def p_simple_expr_paren(self, p):
407 """simple_expr : LPAREN expr RPAREN"""
408 p[0] = Parens(p[2])
410 def p_simple_expr_tuple(self, p):
411 """simple_expr : LPAREN expr COMMA expr RPAREN"""
412 # For now we only support tuples with two items,
413 # these are used for time ranges.
414 p[0] = TupleNode((p[2], p[4]))
416 def p_literal_num(self, p):
417 """literal : NUMERIC_LITERAL"""
418 p[0] = NumericLiteral(p[1])
420 def p_literal_num_signed(self, p):
421 """literal : ADD NUMERIC_LITERAL %prec UPLUS
422 | SUB NUMERIC_LITERAL %prec UMINUS
423 """
424 p[0] = NumericLiteral(p[1] + p[2])
426 def p_literal_str(self, p):
427 """literal : STRING_LITERAL"""
428 p[0] = StringLiteral(p[1])
430 def p_literal_time(self, p):
431 """literal : TIME_LITERAL"""
432 try:
433 value = _parseTimeString(p[1])
434 except ValueError as e:
435 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1)) from e
436 p[0] = TimeLiteral(value)
438 def p_literal_range(self, p):
439 """literal : RANGE_LITERAL"""
440 # RANGE_LITERAL value is tuple of three numbers
441 start, stop, stride = p[1]
442 p[0] = RangeLiteral(start, stop, stride)
444 def p_function_call(self, p):
445 """function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN"""
446 p[0] = function_call(p[1], p[3])
448 def p_expr_list(self, p):
449 """expr_list : expr_list COMMA expr
450 | expr
451 | empty
452 """
453 if len(p) == 2:
454 if p[1] is None:
455 p[0] = []
456 else:
457 p[0] = [p[1]]
458 else:
459 p[0] = p[1] + [p[3]]
461 # ---------- end of all grammar rules ----------
463 # Error rule for syntax errors
464 def p_error(self, p):
465 if p is None:
466 raise ParserEOFError()
467 else:
468 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)