Coverage for python/lsst/daf/butler/registry/queries/exprParser/parserLex.py : 49%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22# type: ignore
24"""Module which defines PLY lexer for user expressions parsed by pre-flight.
25"""
27__all__ = ["ParserLex", "ParserLexError"]
29# -------------------------------
30# Imports of standard modules --
31# -------------------------------
32import re
34# -----------------------------
35# Imports for other modules --
36# -----------------------------
37from .ply import lex
39# ----------------------------------
40# Local non-exported definitions --
41# ----------------------------------
43_RE_RANGE = r"(?P<start>-?\d+)\s*\.\.\s*(?P<stop>-?\d+)(\s*:\s*(?P<stride>[1-9]\d*))?"
44"""Regular expression to match range literal in the form NUM..NUM[:NUM],
45this must match t_RANGE_LITERAL docstring.
46"""
48# ------------------------
49# Exported definitions --
50# ------------------------
53class ParserLexError(Exception):
54 """Exception raised for lex-phase errors.
56 Attributes
57 ----------
58 expression : str
59 Full initial expression being parsed
60 remain : str
61 Remaining non-parsed part of the expression
62 pos : int
63 Current parsing position, offset from beginning of expression in
64 characters
65 lineno : int
66 Current line number in the expression
67 """
69 def __init__(self, expression, remain, pos, lineno):
70 Exception.__init__(self, "Unexpected character at position {}".format(pos))
71 self.expression = expression
72 self.remain = remain
73 self.pos = pos
74 self.lineno = lineno
77class ParserLex:
78 """Class which defines PLY lexer.
79 """
81 @classmethod
82 def make_lexer(cls, reflags=0, **kwargs):
83 """Factory for lexers.
85 Returns
86 -------
87 `ply.lex.Lexer` instance.
88 """
90 # make sure that flags that we need are there
91 kw = dict(reflags=reflags | re.IGNORECASE | re.VERBOSE)
92 kw.update(kwargs)
94 return lex.lex(object=cls(), **kw)
96 # literals = ""
98 # reserved words in a grammar.
99 # SQL has reserved words which we could potentially make reserved in our
100 # grammar too, for now try to pretend we don't care about SQL
101 reserved = dict(
102 # IS="IS",
103 IN="IN",
104 # NULL="NULL",
105 OR="OR",
106 AND="AND",
107 NOT="NOT",
108 OVERLAPS="OVERLAPS",
109 # BETWEEN="BETWEEN",
110 # LIKE="LIKE",
111 # ESCAPE="ESCAPE",
112 # REGEXP="REGEXP"
113 )
115 # List of token names.
116 tokens = (
117 'NUMERIC_LITERAL',
118 'TIME_LITERAL',
119 'STRING_LITERAL',
120 'RANGE_LITERAL',
121 # 'DURATION_LITERAL',
122 'QUALIFIED_IDENTIFIER',
123 'SIMPLE_IDENTIFIER',
124 'LPAREN', 'RPAREN',
125 'EQ', 'NE', 'LT', 'LE', 'GT', 'GE',
126 'ADD', 'SUB', 'MUL', 'DIV', 'MOD',
127 'COMMA'
128 ) + tuple(reserved.values())
130 # Regular expression rules for simple tokens
131 t_LPAREN = r'\('
132 t_RPAREN = r'\)'
133 t_EQ = '='
134 t_NE = '!='
135 t_LT = '<'
136 t_LE = '<='
137 t_GT = '>'
138 t_GE = '>='
139 t_ADD = r'\+'
140 t_SUB = '-'
141 t_MUL = r'\*'
142 t_DIV = '/'
143 t_MOD = '%'
144 t_COMMA = ','
146 # A string containing ignored characters (spaces and tabs)
147 t_ignore = ' \t'
149 # Define a rule so we can track line numbers
150 def t_newline(self, t):
151 r'\n+'
152 t.lexer.lineno += len(t.value)
154 # quoted string prefixed with 'T'
155 def t_TIME_LITERAL(self, t):
156 r"T'.*?'"
157 # strip quotes
158 t.value = t.value[2:-1]
159 return t
161 # quoted string
162 def t_STRING_LITERAL(self, t):
163 r"'.*?'"
164 # strip quotes
165 t.value = t.value[1:-1]
166 return t
168 # range literal in format N..M[:S], spaces allowed, see _RE_RANGE
169 @lex.TOKEN(_RE_RANGE)
170 def t_RANGE_LITERAL(self, t):
171 match = re.match(_RE_RANGE, t.value)
172 start = int(match.group("start"))
173 stop = int(match.group("stop"))
174 stride = match.group("stride")
175 if stride is not None:
176 stride = int(stride)
177 t.value = (start, stop, stride)
178 return t
180 # numbers are used as strings by parser, do not convert
181 def t_NUMERIC_LITERAL(self, t):
182 r"""\d+(\.\d*)?(e[-+]?\d+)? # 1, 1., 1.1, 1e10, 1.1e-10, etc.
183 |
184 \.\d+(e[-+]?\d+)? # .1, .1e10, .1e+10
185 """
186 return t
188 # qualified identifiers have one or two dots
189 def t_QUALIFIED_IDENTIFIER(self, t):
190 r"[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*){1,2}"
191 t.type = 'QUALIFIED_IDENTIFIER'
192 return t
194 # we only support ASCII in identifier names
195 def t_SIMPLE_IDENTIFIER(self, t):
196 r"[a-zA-Z_][a-zA-Z0-9_]*"
197 # Check for reserved words and make sure they are upper case
198 reserved = self.reserved.get(t.value.upper())
199 if reserved is not None:
200 t.type = reserved
201 t.value = reserved
202 else:
203 t.type = "SIMPLE_IDENTIFIER"
204 return t
206 def t_error(self, t):
207 "Error handling rule"
208 lexer = t.lexer
209 raise ParserLexError(lexer.lexdata, t.value, lexer.lexpos, lexer.lineno)