Coverage for python/lsst/daf/butler/registry/queries/exprParser/parserYacc.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Syntax definition for user expression parser.
23"""
25__all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"]
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import re
32# -----------------------------
33# Imports for other modules --
34# -----------------------------
35import astropy.time
36from .exprTree import (BinaryOp, Identifier, IsIn, NumericLiteral, Parens,
37 RangeLiteral, StringLiteral, TimeLiteral, UnaryOp)
38from .ply import yacc
39from .parserLex import ParserLex
41# ----------------------------------
42# Local non-exported definitions --
43# ----------------------------------
45# The purpose of this regex is to guess time format if it is not explicitly
46# provided in the string itself
47_re_time_str = re.compile(r"""
48 ((?P<format>\w+)/)? # optionally prefixed by "format/"
49 (?P<value>
50 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number
51 |
52 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t)
53 |
54 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits
55 |
56 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday
57 )
58 (/(?P<scale>\w+))? # optionally followed by "/scale"
59 $
60""", re.VERBOSE | re.IGNORECASE)
63def _parseTimeString(time_str):
64 """Try to convert time string into astropy.Time.
66 Parameters
67 ----------
68 time_str : `str`
69 Input string.
71 Returns
72 -------
73 time : `astropy.time.Time`
75 Raises
76 ------
77 ValueError
78 Raised if input string has unexpected format
79 """
80 match = _re_time_str.match(time_str)
81 if not match:
82 raise ValueError(f"Time string \"{time_str}\" does not match known formats")
84 value, fmt, scale = match.group("value", "format", "scale")
85 if fmt is not None:
86 fmt = fmt.lower()
87 if fmt not in astropy.time.Time.FORMATS:
88 raise ValueError(f"Time string \"{time_str}\" specifies unknown time format \"{fmt}\"")
89 if scale is not None:
90 scale = scale.lower()
91 if scale not in astropy.time.Time.SCALES:
92 raise ValueError(f"Time string \"{time_str}\" specifies unknown time scale \"{scale}\"")
94 # convert number string to floating point
95 if match.group("number") is not None:
96 value = float(value)
98 # guess format if not given
99 if fmt is None:
100 if match.group("number") is not None:
101 fmt = "mjd"
102 elif match.group("iso") is not None:
103 if "T" in value or "t" in value:
104 fmt = "isot"
105 else:
106 fmt = "iso"
107 elif match.group("fits") is not None:
108 fmt = "fits"
109 elif match.group("yday") is not None:
110 fmt = "yday"
111 assert fmt is not None
113 # guess scale if not given
114 if scale is None:
115 if fmt in ("iso", "isot", "fits", "yday", "unix"):
116 scale = "utc"
117 elif fmt == "cxcsec":
118 scale = "tt"
119 else:
120 scale = "tai"
122 try:
123 value = astropy.time.Time(value, format=fmt, scale=scale)
124 except ValueError:
125 # astropy makes very verbose exception that is not super-useful in
126 # many context, just say we don't like it.
127 raise ValueError(f"Time string \"{time_str}\" does not match format \"{fmt}\"") from None
129 return value
131# ------------------------
132# Exported definitions --
133# ------------------------
136class ParserYaccError(Exception):
137 """Base class for exceptions generated by parser.
138 """
139 pass
142class ParseError(ParserYaccError):
143 """Exception raised for parsing errors.
145 Attributes
146 ----------
147 expression : str
148 Full initial expression being parsed
149 token : str
150 Current token at parsing position
151 pos : int
152 Current parsing position, offset from beginning of expression in
153 characters
154 lineno : int
155 Current line number in the expression
156 posInLine : int
157 Parsing position in current line, 0-based
158 """
160 def __init__(self, expression, token, pos, lineno):
161 self.expression = expression
162 self.token = token
163 self.pos = pos
164 self.lineno = lineno
165 self.posInLine = self._posInLine()
166 msg = "Syntax error at or near '{0}' (line: {1}, pos: {2})"
167 msg = msg.format(token, lineno, self.posInLine + 1)
168 ParserYaccError.__init__(self, msg)
170 def _posInLine(self):
171 """Return position in current line"""
172 lines = self.expression.split('\n')
173 pos = self.pos
174 for line in lines[:self.lineno - 1]:
175 # +1 for newline
176 pos -= len(line) + 1
177 return pos
180class ParserEOFError(ParserYaccError):
181 """Exception raised for EOF-during-parser.
182 """
184 def __init__(self):
185 Exception.__init__(self,
186 "End of input reached while expecting further input")
189class ParserYacc:
190 """Class which defines PLY grammar.
191 """
193 def __init__(self, **kwargs):
195 kw = dict(write_tables=0, debug=False)
196 kw.update(kwargs)
198 self.parser = yacc.yacc(module=self, **kw)
200 def parse(self, input, lexer=None, debug=False, tracking=False):
201 """Parse input expression ad return parsed tree object.
203 This is a trivial wrapper for yacc.LRParser.parse method which
204 provides lexer if not given in arguments.
206 Parameters
207 ----------
208 input : str
209 Expression to parse
210 lexer : object, optional
211 Lexer instance, if not given then ParserLex.make_lexer() is
212 called to create one.
213 debug : bool, optional
214 Set to True for debugging output.
215 tracking : bool, optional
216 Set to True for tracking line numbers in parser.
217 """
218 # make lexer
219 if lexer is None:
220 lexer = ParserLex.make_lexer()
221 tree = self.parser.parse(input=input, lexer=lexer, debug=debug,
222 tracking=tracking)
223 return tree
225 tokens = ParserLex.tokens[:]
227 precedence = (
228 ('left', 'OR'),
229 ('left', 'AND'),
230 ('nonassoc', 'EQ', 'NE'), # Nonassociative operators
231 ('nonassoc', 'LT', 'LE', 'GT', 'GE'), # Nonassociative operators
232 ('left', 'ADD', 'SUB'),
233 ('left', 'MUL', 'DIV', 'MOD'),
234 ('right', 'UPLUS', 'UMINUS', 'NOT'), # unary plus and minus
235 )
237 # this is the starting rule
238 def p_input(self, p):
239 """ input : expr
240 | empty
241 """
242 p[0] = p[1]
244 def p_empty(self, p):
245 """ empty :
246 """
247 p[0] = None
249 def p_expr(self, p):
250 """ expr : expr OR expr
251 | expr AND expr
252 | NOT expr
253 | bool_primary
254 """
255 if len(p) == 4:
256 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3])
257 elif len(p) == 3:
258 p[0] = UnaryOp(op=p[1].upper(), operand=p[2])
259 else:
260 p[0] = p[1]
262 def p_bool_primary(self, p):
263 """ bool_primary : bool_primary EQ predicate
264 | bool_primary NE predicate
265 | bool_primary LT predicate
266 | bool_primary LE predicate
267 | bool_primary GE predicate
268 | bool_primary GT predicate
269 | predicate
270 """
271 if len(p) == 2:
272 p[0] = p[1]
273 else:
274 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
276 def p_predicate(self, p):
277 """ predicate : bit_expr IN LPAREN literal_list RPAREN
278 | bit_expr NOT IN LPAREN literal_list RPAREN
279 | bit_expr
280 """
281 if len(p) == 6:
282 p[0] = IsIn(lhs=p[1], values=p[4])
283 elif len(p) == 7:
284 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True)
285 else:
286 p[0] = p[1]
288 def p_literal_list(self, p):
289 """ literal_list : literal_list COMMA literal
290 | literal
291 """
292 if len(p) == 2:
293 p[0] = [p[1]]
294 else:
295 p[0] = p[1] + [p[3]]
297 def p_bit_expr(self, p):
298 """ bit_expr : bit_expr ADD bit_expr
299 | bit_expr SUB bit_expr
300 | bit_expr MUL bit_expr
301 | bit_expr DIV bit_expr
302 | bit_expr MOD bit_expr
303 | simple_expr
304 """
305 if len(p) == 2:
306 p[0] = p[1]
307 else:
308 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
310 def p_simple_expr_lit(self, p):
311 """ simple_expr : literal
312 """
313 p[0] = p[1]
315 def p_simple_expr_id(self, p):
316 """ simple_expr : IDENTIFIER
317 """
318 p[0] = Identifier(p[1])
320 def p_simple_expr_unary(self, p):
321 """ simple_expr : ADD simple_expr %prec UPLUS
322 | SUB simple_expr %prec UMINUS
323 """
324 p[0] = UnaryOp(op=p[1], operand=p[2])
326 def p_simple_expr_paren(self, p):
327 """ simple_expr : LPAREN expr RPAREN
328 """
329 p[0] = Parens(p[2])
331 def p_literal_num(self, p):
332 """ literal : NUMERIC_LITERAL
333 """
334 p[0] = NumericLiteral(p[1])
336 def p_literal_num_signed(self, p):
337 """ literal : ADD NUMERIC_LITERAL %prec UPLUS
338 | SUB NUMERIC_LITERAL %prec UMINUS
339 """
340 p[0] = NumericLiteral(p[1] + p[2])
342 def p_literal_str(self, p):
343 """ literal : STRING_LITERAL
344 """
345 p[0] = StringLiteral(p[1])
347 def p_literal_time(self, p):
348 """ literal : TIME_LITERAL
349 """
350 try:
351 value = _parseTimeString(p[1])
352 except ValueError:
353 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1))
354 p[0] = TimeLiteral(value)
356 def p_literal_range(self, p):
357 """ literal : RANGE_LITERAL
358 """
359 # RANGE_LITERAL value is tuple of three numbers
360 start, stop, stride = p[1]
361 p[0] = RangeLiteral(start, stop, stride)
363 # ---------- end of all grammar rules ----------
365 # Error rule for syntax errors
366 def p_error(self, p):
367 if p is None:
368 raise ParserEOFError()
369 else:
370 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)