Coverage for python / lsst / daf / butler / queries / expressions / parser / parserYacc.py: 33%
199 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 08:49 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-17 08:49 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28"""Syntax definition for user expression parser."""
30from __future__ import annotations
32__all__ = ["ParseError", "ParserEOFError", "ParserYacc", "ParserYaccError", "YaccProduction"]
34import functools
35import re
36import warnings
37from typing import Any, Protocol
39import astropy.time
41# As of astropy 4.2, the erfa interface is shipped independently and
42# ErfaWarning is no longer an AstropyWarning
43try:
44 import erfa
45except ImportError:
46 erfa = None
48from .exprTree import (
49 BinaryOp,
50 BindName,
51 Identifier,
52 IsIn,
53 Node,
54 NumericLiteral,
55 Parens,
56 RangeLiteral,
57 StringLiteral,
58 TimeLiteral,
59 TupleNode,
60 UnaryOp,
61 function_call,
62)
63from .parserLex import LexToken, ParserLex, ParserLexError
64from .ply import yacc
67class YaccProduction(Protocol):
68 """Protocol for YaccProduction defined in ``ply.yacc``."""
70 lexer: Any
71 value: Any
73 def __getitem__(self, n: int) -> Any: ... 73 ↛ exitline 73 didn't return from function '__getitem__' because
74 def __setitem__(self, n: int, v: Any) -> None: ... 74 ↛ exitline 74 didn't return from function '__setitem__' because
75 def __len__(self) -> int: ... 75 ↛ exitline 75 didn't return from function '__len__' because
76 def lineno(self, n: int) -> int: ... 76 ↛ exitline 76 didn't return from function 'lineno' because
77 def lexpos(self, n: int) -> int: ... 77 ↛ exitline 77 didn't return from function 'lexpos' because
80# The purpose of this regex is to guess time format if it is not explicitly
81# provided in the string itself
82_re_time_str = re.compile(
83 r"""
84 ((?P<format>\w+)/)? # optionally prefixed by "format/"
85 (?P<value>
86 (?P<number>-?(\d+(\.\d*)|(\.\d+))) # floating point number
87 |
88 (?P<iso>\d+-\d+-\d+([ T]\d+:\d+(:\d+([.]\d*)?)?)?) # iso(t) [no timezone]
89 |
90 (?P<fits>[+]\d+-\d+-\d+(T\d+:\d+:\d+([.]\d*)?)?) # fits
91 |
92 (?P<yday>\d+:\d+(:\d+:\d+(:\d+([.]\d*)?)?)?) # yday
93 )
94 (/(?P<scale>\w+))? # optionally followed by "/scale"
95 $
96""",
97 re.VERBOSE | re.IGNORECASE,
98)
101def _parseTimeString(time_str: str) -> astropy.time.Time:
102 """Try to convert time string into astropy.Time.
104 Parameters
105 ----------
106 time_str : `str`
107 Input string.
109 Returns
110 -------
111 time : `astropy.time.Time`
112 The parsed time.
114 Raises
115 ------
116 ValueError
117 Raised if input string has unexpected format.
118 """
119 # Check for time zone. Python datetime objects can be timezone-aware
120 # and if one has been stringified then there will be a +00:00 on the end.
121 # Special case UTC. Fail for other timezones.
122 time_str = time_str.replace("+00:00", "")
124 match = _re_time_str.match(time_str)
125 if not match:
126 raise ValueError(f'Time string "{time_str}" does not match known formats')
128 value, fmt, scale = match.group("value", "format", "scale")
129 if fmt is not None:
130 fmt = fmt.lower()
131 if fmt not in astropy.time.Time.FORMATS:
132 raise ValueError(f'Time string "{time_str}" specifies unknown time format "{fmt}"')
133 if scale is not None:
134 scale = scale.lower()
135 if scale not in astropy.time.Time.SCALES:
136 raise ValueError(f'Time string "{time_str}" specifies unknown time scale "{scale}"')
138 # convert number string to floating point
139 if match.group("number") is not None:
140 value = float(value)
142 # guess format if not given
143 if fmt is None:
144 if match.group("number") is not None:
145 fmt = "mjd"
146 elif match.group("iso") is not None:
147 if "T" in value or "t" in value:
148 fmt = "isot"
149 else:
150 fmt = "iso"
151 elif match.group("fits") is not None:
152 fmt = "fits"
153 elif match.group("yday") is not None:
154 fmt = "yday"
155 assert fmt is not None
157 # guess scale if not given
158 if scale is None:
159 if fmt in ("iso", "isot", "fits", "yday", "unix"):
160 scale = "utc"
161 elif fmt == "cxcsec":
162 scale = "tt"
163 else:
164 scale = "tai"
166 try:
167 # Hide warnings about future dates
168 with warnings.catch_warnings():
169 warnings.simplefilter("ignore", category=astropy.utils.exceptions.AstropyWarning)
170 if erfa is not None:
171 warnings.simplefilter("ignore", category=erfa.ErfaWarning)
172 value = astropy.time.Time(value, format=fmt, scale=scale)
173 except ValueError:
174 # astropy makes very verbose exception that is not super-useful in
175 # many context, just say we don't like it.
176 raise ValueError(f'Time string "{time_str}" does not match format "{fmt}"') from None
178 return value
181# ------------------------
182# Exported definitions --
183# ------------------------
186class ParserYaccError(Exception):
187 """Base class for exceptions generated by parser."""
189 pass
192class ParseError(ParserYaccError):
193 """Exception raised for parsing errors.
195 Parameters
196 ----------
197 expression : `str`
198 Full initial expression being parsed.
199 token : `str`
200 Current token at parsing position.
201 pos : `int`
202 Current parsing position, offset from beginning of expression in
203 characters.
204 lineno : `int`
205 Current line number in the expression.
207 Attributes
208 ----------
209 expression : `str`
210 Full initial expression being parsed.
211 token : `str`
212 Current token at parsing position.
213 pos : `int`
214 Current parsing position, offset from beginning of expression in
215 characters.
216 lineno : `int`
217 Current line number in the expression.
218 posInLine : `int`
219 Parsing position in current line, 0-based.
220 """
222 def __init__(self, expression: str, token: str, pos: int, lineno: int):
223 self.expression = expression
224 self.token = token
225 self.pos = pos
226 self.lineno = lineno
227 self.posInLine = self._posInLine()
228 msg = f"Syntax error at or near '{token}' (line: {lineno}, pos: {self.posInLine + 1})"
229 ParserYaccError.__init__(self, msg)
231 def _posInLine(self) -> int:
232 """Return position in current line"""
233 lines = self.expression.split("\n")
234 pos = self.pos
235 for line in lines[: self.lineno - 1]:
236 # +1 for newline
237 pos -= len(line) + 1
238 return pos
241class ParserEOFError(ParserYaccError):
242 """Exception raised for EOF-during-parser."""
244 def __init__(self) -> None:
245 Exception.__init__(self, "End of input reached while expecting further input")
248class ParserYacc:
249 """Class which defines PLY grammar.
251 Based on MySQL grammar for expressions
252 (https://dev.mysql.com/doc/refman/5.7/en/expressions.html).
254 Parameters
255 ----------
256 **kwargs
257 Optional keyword arguments that are passed to `yacc.yacc` constructor.
258 """
260 def __init__(self, **kwargs: Any):
261 kw = dict(write_tables=0, debug=False)
262 kw.update(kwargs)
263 self.parser = self._parser_factory(**kw)
265 @staticmethod
266 @functools.cache
267 def _parser_factory(**kwarg: Any) -> Any:
268 """Make parser instance."""
269 return yacc.yacc(module=ParserYacc, **kwarg)
271 def parse(self, input: str, lexer: Any = None, debug: bool = False, tracking: bool = False) -> Node:
272 """Parse input expression ad return parsed tree object.
274 This is a trivial wrapper for yacc.LRParser.parse method which
275 provides lexer if not given in arguments.
277 Parameters
278 ----------
279 input : `str`
280 Expression to parse.
281 lexer : `object`, optional
282 Lexer instance, if not given then ParserLex.make_lexer() is
283 called to create one.
284 debug : `bool`, optional
285 Set to True for debugging output.
286 tracking : `bool`, optional
287 Set to True for tracking line numbers in parser.
288 """
289 # make lexer
290 if lexer is None:
291 lexer = ParserLex.make_lexer()
292 try:
293 tree = self.parser.parse(input=input, lexer=lexer, debug=debug, tracking=tracking)
294 except ParserLexError as exc:
295 # Convert it into ParserYaccError
296 raise ParserYaccError(str(exc)) from exc
297 return tree
299 tokens = ParserLex.tokens[:]
301 precedence = (
302 ("left", "OR"),
303 ("left", "AND"),
304 ("nonassoc", "OVERLAPS"), # Nonassociative operators
305 ("nonassoc", "EQ", "NE"), # Nonassociative operators
306 ("nonassoc", "LT", "LE", "GT", "GE"), # Nonassociative operators
307 ("left", "ADD", "SUB"),
308 ("left", "MUL", "DIV", "MOD"),
309 ("right", "UPLUS", "UMINUS", "NOT"), # unary plus and minus
310 )
312 # this is the starting rule
313 @classmethod
314 def p_input(cls, p: YaccProduction) -> None:
315 """input : expr
316 | empty
317 """
318 p[0] = p[1]
320 @classmethod
321 def p_empty(cls, p: YaccProduction) -> None:
322 """empty :"""
323 p[0] = None
325 @classmethod
326 def p_expr(cls, p: YaccProduction) -> None:
327 """expr : expr OR expr
328 | expr AND expr
329 | NOT expr
330 | bool_primary
331 """
332 if len(p) == 4:
333 p[0] = BinaryOp(lhs=p[1], op=p[2].upper(), rhs=p[3])
334 elif len(p) == 3:
335 p[0] = UnaryOp(op=p[1].upper(), operand=p[2])
336 else:
337 p[0] = p[1]
339 @classmethod
340 def p_bool_primary(cls, p: YaccProduction) -> None:
341 """bool_primary : bool_primary EQ predicate
342 | bool_primary NE predicate
343 | bool_primary LT predicate
344 | bool_primary LE predicate
345 | bool_primary GE predicate
346 | bool_primary GT predicate
347 | bool_primary OVERLAPS predicate
348 | predicate
349 """
350 if len(p) == 2:
351 p[0] = p[1]
352 else:
353 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
355 @classmethod
356 def p_predicate(cls, p: YaccProduction) -> None:
357 """predicate : bit_expr IN LPAREN literal_or_bind_list RPAREN
358 | bit_expr NOT IN LPAREN literal_or_bind_list RPAREN
359 | bit_expr
360 """
361 if len(p) == 6:
362 p[0] = IsIn(lhs=p[1], values=p[4])
363 elif len(p) == 7:
364 p[0] = IsIn(lhs=p[1], values=p[5], not_in=True)
365 else:
366 p[0] = p[1]
368 @classmethod
369 def p_simple_id(cls, p: YaccProduction) -> None:
370 """simple_id : SIMPLE_IDENTIFIER"""
371 p[0] = Identifier(p[1])
373 @classmethod
374 def p_qualified_id(cls, p: YaccProduction) -> None:
375 """qualified_id : QUALIFIED_IDENTIFIER"""
376 p[0] = Identifier(p[1])
378 @classmethod
379 def p_identifier(cls, p: YaccProduction) -> None:
380 """identifier : simple_id
381 | qualified_id
382 """
383 p[0] = p[1]
385 @classmethod
386 def p_literal_or_id_list(cls, p: YaccProduction) -> None:
387 """literal_or_bind_list : literal_or_bind_list COMMA literal
388 | literal_or_bind_list COMMA simple_id
389 | literal_or_bind_list COMMA bind_name
390 | literal_or_bind_list COMMA function_call
391 | literal
392 | simple_id
393 | bind_name
394 | function_call
395 """
396 # This expression is only used in IN() operator and it is supposed to
397 # include only literals and bind names (and identifiers as we still
398 # allow simple identifiers as bind names). UUID literal is implemented
399 # via UUID() function call, so we need to allow function calls here
400 # too. IsIn will check that all operands are literals or binds.
401 if len(p) == 2:
402 p[0] = [p[1]]
403 else:
404 p[0] = p[1] + [p[3]]
406 @classmethod
407 def p_bind_name(cls, p: YaccProduction) -> None:
408 """bind_name : BIND_NAME"""
409 p[0] = BindName(p[1])
411 @classmethod
412 def p_bit_expr(cls, p: YaccProduction) -> None:
413 """bit_expr : bit_expr ADD bit_expr
414 | bit_expr SUB bit_expr
415 | bit_expr MUL bit_expr
416 | bit_expr DIV bit_expr
417 | bit_expr MOD bit_expr
418 | simple_expr
419 """
420 if len(p) == 2:
421 p[0] = p[1]
422 else:
423 p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
425 @classmethod
426 def p_simple_expr_lit(cls, p: YaccProduction) -> None:
427 """simple_expr : literal
428 | identifier
429 | bind_name
430 | function_call
431 """
432 p[0] = p[1]
434 @classmethod
435 def p_simple_expr_unary(cls, p: YaccProduction) -> None:
436 """simple_expr : ADD simple_expr %prec UPLUS
437 | SUB simple_expr %prec UMINUS
438 """
439 p[0] = UnaryOp(op=p[1], operand=p[2])
441 @classmethod
442 def p_simple_expr_paren(cls, p: YaccProduction) -> None:
443 """simple_expr : LPAREN expr RPAREN"""
444 p[0] = Parens(p[2])
446 @classmethod
447 def p_simple_expr_tuple(cls, p: YaccProduction) -> None:
448 """simple_expr : LPAREN expr COMMA expr RPAREN"""
449 # For now we only support tuples with two items,
450 # these are used for time ranges.
451 p[0] = TupleNode((p[2], p[4]))
453 @classmethod
454 def p_literal_num(cls, p: YaccProduction) -> None:
455 """literal : NUMERIC_LITERAL"""
456 p[0] = NumericLiteral(p[1])
458 @classmethod
459 def p_literal_num_signed(cls, p: YaccProduction) -> None:
460 """literal : ADD NUMERIC_LITERAL %prec UPLUS
461 | SUB NUMERIC_LITERAL %prec UMINUS
462 """
463 p[0] = NumericLiteral(p[1] + p[2])
465 @classmethod
466 def p_literal_str(cls, p: YaccProduction) -> None:
467 """literal : STRING_LITERAL"""
468 p[0] = StringLiteral(p[1])
470 @classmethod
471 def p_literal_time(cls, p: YaccProduction) -> None:
472 """literal : TIME_LITERAL"""
473 try:
474 value = _parseTimeString(p[1])
475 except ValueError as e:
476 raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1)) from e
477 p[0] = TimeLiteral(value)
479 @classmethod
480 def p_literal_range(cls, p: YaccProduction) -> None:
481 """literal : RANGE_LITERAL"""
482 # RANGE_LITERAL value is tuple of three numbers
483 start, stop, stride = p[1]
484 p[0] = RangeLiteral(start, stop, stride)
486 @classmethod
487 def p_function_call(cls, p: YaccProduction) -> None:
488 """function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN"""
489 p[0] = function_call(p[1], p[3])
491 @classmethod
492 def p_expr_list(cls, p: YaccProduction) -> None:
493 """expr_list : expr_list COMMA expr
494 | expr
495 | empty
496 """
497 if len(p) == 2:
498 if p[1] is None:
499 p[0] = []
500 else:
501 p[0] = [p[1]]
502 else:
503 p[0] = p[1] + [p[3]]
505 # ---------- end of all grammar rules ----------
507 # Error rule for syntax errors
508 @classmethod
509 def p_error(cls, p: LexToken | None) -> None:
510 if p is None:
511 raise ParserEOFError()
512 else:
513 raise ParseError(p.lexer.lexdata, p.value, p.lexpos, p.lineno)