Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py: 53%
69 statements
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-23 09:44 +0000
« prev ^ index » next coverage.py v6.4.1, created at 2022-06-23 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22# type: ignore
24"""Module which defines PLY lexer for user expressions parsed by pre-flight.
25"""
27__all__ = ["ParserLex", "ParserLexError"]
29# -------------------------------
30# Imports of standard modules --
31# -------------------------------
32import re
34# -----------------------------
35# Imports for other modules --
36# -----------------------------
37from .ply import lex
39# ----------------------------------
40# Local non-exported definitions --
41# ----------------------------------
43_RE_RANGE = r"(?P<start>-?\d+)\s*\.\.\s*(?P<stop>-?\d+)(\s*:\s*(?P<stride>[1-9]\d*))?"
44"""Regular expression to match range literal in the form NUM..NUM[:NUM],
45this must match t_RANGE_LITERAL docstring.
46"""
48# ------------------------
49# Exported definitions --
50# ------------------------
53class ParserLexError(Exception):
54 """Exception raised for lex-phase errors.
56 Attributes
57 ----------
58 expression : str
59 Full initial expression being parsed
60 remain : str
61 Remaining non-parsed part of the expression
62 pos : int
63 Current parsing position, offset from beginning of expression in
64 characters
65 lineno : int
66 Current line number in the expression
67 """
69 def __init__(self, expression, remain, pos, lineno):
70 Exception.__init__(self, "Unexpected character at position {}".format(pos))
71 self.expression = expression
72 self.remain = remain
73 self.pos = pos
74 self.lineno = lineno
77class ParserLex:
78 """Class which defines PLY lexer."""
80 @classmethod
81 def make_lexer(cls, reflags=0, **kwargs):
82 """Factory for lexers.
84 Returns
85 -------
86 `ply.lex.Lexer` instance.
87 """
89 # make sure that flags that we need are there
90 kw = dict(reflags=reflags | re.IGNORECASE | re.VERBOSE)
91 kw.update(kwargs)
93 return lex.lex(object=cls(), **kw)
95 # literals = ""
97 # reserved words in a grammar.
98 # SQL has reserved words which we could potentially make reserved in our
99 # grammar too, for now try to pretend we don't care about SQL
100 reserved = dict(
101 # IS="IS",
102 IN="IN",
103 # NULL="NULL",
104 OR="OR",
105 AND="AND",
106 NOT="NOT",
107 OVERLAPS="OVERLAPS",
108 # BETWEEN="BETWEEN",
109 # LIKE="LIKE",
110 # ESCAPE="ESCAPE",
111 # REGEXP="REGEXP"
112 )
114 # List of token names.
115 tokens = (
116 "NUMERIC_LITERAL",
117 "TIME_LITERAL",
118 "STRING_LITERAL",
119 "RANGE_LITERAL",
120 # 'DURATION_LITERAL',
121 "QUALIFIED_IDENTIFIER",
122 "SIMPLE_IDENTIFIER",
123 "LPAREN",
124 "RPAREN",
125 "EQ",
126 "NE",
127 "LT",
128 "LE",
129 "GT",
130 "GE",
131 "ADD",
132 "SUB",
133 "MUL",
134 "DIV",
135 "MOD",
136 "COMMA",
137 ) + tuple(reserved.values())
139 # Regular expression rules for simple tokens
140 t_LPAREN = r"\("
141 t_RPAREN = r"\)"
142 t_EQ = "="
143 t_NE = "!="
144 t_LT = "<"
145 t_LE = "<="
146 t_GT = ">"
147 t_GE = ">="
148 t_ADD = r"\+"
149 t_SUB = "-"
150 t_MUL = r"\*"
151 t_DIV = "/"
152 t_MOD = "%"
153 t_COMMA = ","
155 # A string containing ignored characters (spaces and tabs)
156 t_ignore = " \t"
158 # Define a rule so we can track line numbers
159 def t_newline(self, t):
160 r"\n+"
161 t.lexer.lineno += len(t.value)
163 # quoted string prefixed with 'T'
164 def t_TIME_LITERAL(self, t):
165 r"T'.*?'"
166 # strip quotes
167 t.value = t.value[2:-1]
168 return t
170 # quoted string
171 def t_STRING_LITERAL(self, t):
172 r"'.*?'"
173 # strip quotes
174 t.value = t.value[1:-1]
175 return t
177 # range literal in format N..M[:S], spaces allowed, see _RE_RANGE
178 @lex.TOKEN(_RE_RANGE)
179 def t_RANGE_LITERAL(self, t):
180 match = re.match(_RE_RANGE, t.value)
181 start = int(match.group("start"))
182 stop = int(match.group("stop"))
183 stride = match.group("stride")
184 if stride is not None:
185 stride = int(stride)
186 t.value = (start, stop, stride)
187 return t
189 # numbers are used as strings by parser, do not convert
190 def t_NUMERIC_LITERAL(self, t):
191 r"""\d+(\.\d*)?(e[-+]?\d+)? # 1, 1., 1.1, 1e10, 1.1e-10, etc.
192 |
193 \.\d+(e[-+]?\d+)? # .1, .1e10, .1e+10
194 """
195 return t
197 # qualified identifiers have one or two dots
198 def t_QUALIFIED_IDENTIFIER(self, t):
199 r"[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*){1,2}"
200 t.type = "QUALIFIED_IDENTIFIER"
201 return t
203 # we only support ASCII in identifier names
204 def t_SIMPLE_IDENTIFIER(self, t):
205 r"[a-zA-Z_][a-zA-Z0-9_]*"
206 # Check for reserved words and make sure they are upper case
207 reserved = self.reserved.get(t.value.upper())
208 if reserved is not None:
209 t.type = reserved
210 t.value = reserved
211 else:
212 t.type = "SIMPLE_IDENTIFIER"
213 return t
215 def t_error(self, t):
216 "Error handling rule"
217 lexer = t.lexer
218 raise ParserLexError(lexer.lexdata, t.value, lexer.lexpos, lexer.lineno)