Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py: 53%
69 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-25 10:24 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-04-25 10:24 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <https://www.gnu.org/licenses/>.
28# type: ignore
30"""Module which defines PLY lexer for user expressions parsed by pre-flight.
31"""
33__all__ = ["ParserLex", "ParserLexError"]
35# -------------------------------
36# Imports of standard modules --
37# -------------------------------
38import re
40# -----------------------------
41# Imports for other modules --
42# -----------------------------
43from .ply import lex
45# ----------------------------------
46# Local non-exported definitions --
47# ----------------------------------
49_RE_RANGE = r"(?P<start>-?\d+)\s*\.\.\s*(?P<stop>-?\d+)(\s*:\s*(?P<stride>[1-9]\d*))?"
50"""Regular expression to match range literal in the form NUM..NUM[:NUM],
51this must match t_RANGE_LITERAL docstring.
52"""
54# ------------------------
55# Exported definitions --
56# ------------------------
59class ParserLexError(Exception):
60 """Exception raised for lex-phase errors.
62 Parameters
63 ----------
64 expression : `str`
65 Full initial expression being parsed.
66 remain : `str`
67 Remaining non-parsed part of the expression.
68 pos : `int`
69 Current parsing position, offset from beginning of expression in
70 characters.
71 lineno : `int`
72 Current line number in the expression.
73 """
75 def __init__(self, expression, remain, pos, lineno):
76 Exception.__init__(self, f"Unexpected character at position {pos}")
77 self.expression = expression
78 self.remain = remain
79 self.pos = pos
80 self.lineno = lineno
83class ParserLex:
84 """Class which defines PLY lexer."""
86 @classmethod
87 def make_lexer(cls, reflags=0, **kwargs):
88 """Return lexer.
90 Parameters
91 ----------
92 reflags : `int`, optional
93 Regular expression flags.
94 **kwargs
95 Additional parameters for lexer.
97 Returns
98 -------
99 `ply.lex.Lexer`
100 Lexer instance.
101 """
102 # make sure that flags that we need are there
103 kw = dict(reflags=reflags | re.IGNORECASE | re.VERBOSE)
104 kw.update(kwargs)
106 return lex.lex(object=cls(), **kw)
108 # literals = ""
110 # reserved words in a grammar.
111 # SQL has reserved words which we could potentially make reserved in our
112 # grammar too, for now try to pretend we don't care about SQL
113 reserved = dict(
114 # IS="IS",
115 IN="IN",
116 # NULL="NULL",
117 OR="OR",
118 AND="AND",
119 NOT="NOT",
120 OVERLAPS="OVERLAPS",
121 # BETWEEN="BETWEEN",
122 # LIKE="LIKE",
123 # ESCAPE="ESCAPE",
124 # REGEXP="REGEXP"
125 )
127 # List of token names.
128 tokens = (
129 "NUMERIC_LITERAL",
130 "TIME_LITERAL",
131 "STRING_LITERAL",
132 "RANGE_LITERAL",
133 # 'DURATION_LITERAL',
134 "QUALIFIED_IDENTIFIER",
135 "SIMPLE_IDENTIFIER",
136 "LPAREN",
137 "RPAREN",
138 "EQ",
139 "NE",
140 "LT",
141 "LE",
142 "GT",
143 "GE",
144 "ADD",
145 "SUB",
146 "MUL",
147 "DIV",
148 "MOD",
149 "COMMA",
150 ) + tuple(reserved.values())
152 # Regular expression rules for simple tokens
153 t_LPAREN = r"\("
154 t_RPAREN = r"\)"
155 t_EQ = "="
156 t_NE = "!="
157 t_LT = "<"
158 t_LE = "<="
159 t_GT = ">"
160 t_GE = ">="
161 t_ADD = r"\+"
162 t_SUB = "-"
163 t_MUL = r"\*"
164 t_DIV = "/"
165 t_MOD = "%"
166 t_COMMA = ","
168 # A string containing ignored characters (spaces and tabs)
169 t_ignore = " \t"
171 # Define a rule so we can track line numbers
172 def t_newline(self, t):
173 r"""\n+"""
174 t.lexer.lineno += len(t.value)
176 # quoted string prefixed with 'T'
177 def t_TIME_LITERAL(self, t):
178 """T'.*?'"""
179 # strip quotes
180 t.value = t.value[2:-1]
181 return t
183 # quoted string
184 def t_STRING_LITERAL(self, t):
185 """'.*?'"""
186 # strip quotes
187 t.value = t.value[1:-1]
188 return t
190 # range literal in format N..M[:S], spaces allowed, see _RE_RANGE
191 @lex.TOKEN(_RE_RANGE)
192 def t_RANGE_LITERAL(self, t):
193 match = re.match(_RE_RANGE, t.value)
194 start = int(match.group("start"))
195 stop = int(match.group("stop"))
196 stride = match.group("stride")
197 if stride is not None:
198 stride = int(stride)
199 t.value = (start, stop, stride)
200 return t
202 # numbers are used as strings by parser, do not convert
203 def t_NUMERIC_LITERAL(self, t):
204 r"""\d+(\.\d*)?(e[-+]?\d+)? # 1, 1., 1.1, 1e10, 1.1e-10, etc.
205 |
206 \.\d+(e[-+]?\d+)? # .1, .1e10, .1e+10
207 """
208 return t
210 # qualified identifiers have one or two dots
211 def t_QUALIFIED_IDENTIFIER(self, t):
212 r"""[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*){1,2}"""
213 t.type = "QUALIFIED_IDENTIFIER"
214 return t
216 # we only support ASCII in identifier names
217 def t_SIMPLE_IDENTIFIER(self, t):
218 """[a-zA-Z_][a-zA-Z0-9_]*"""
219 # Check for reserved words and make sure they are upper case
220 reserved = self.reserved.get(t.value.upper())
221 if reserved is not None:
222 t.type = reserved
223 t.value = reserved
224 else:
225 t.type = "SIMPLE_IDENTIFIER"
226 return t
228 def t_error(self, t):
229 """Error handling rule"""
230 lexer = t.lexer
231 raise ParserLexError(lexer.lexdata, t.value, lexer.lexpos, lexer.lineno)