Coverage for python/lsst/daf/butler/registry/queries/exprParser/parserLex.py : 55%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <https://www.gnu.org/licenses/>.
22"""Module which defines PLY lexer for user expressions parsed by pre-flight.
23"""
25__all__ = ["ParserLex", "ParserLexError"]
27# -------------------------------
28# Imports of standard modules --
29# -------------------------------
30import re
32# -----------------------------
33# Imports for other modules --
34# -----------------------------
35from .ply import lex
37# ----------------------------------
38# Local non-exported definitions --
39# ----------------------------------
41_RE_RANGE = r"(?P<start>-?\d+)\s*\.\.\s*(?P<stop>-?\d+)(\s*:\s*(?P<stride>[1-9]\d*))?"
42"""Regular expression to match range literal in the form NUM..NUM[:NUM],
43this must match t_RANGE_LITERAL docstring.
44"""
46# ------------------------
47# Exported definitions --
48# ------------------------
51class ParserLexError(Exception):
52 """Exception raised for lex-phase errors.
54 Attributes
55 ----------
56 expression : str
57 Full initial expression being parsed
58 remain : str
59 Remaining non-parsed part of the expression
60 pos : int
61 Current parsing position, offset from beginning of expression in
62 characters
63 lineno : int
64 Current line number in the expression
65 """
67 def __init__(self, expression, remain, pos, lineno):
68 Exception.__init__(self, "Unexpected character at position {}".format(pos))
69 self.expression = expression
70 self.remain = remain
71 self.pos = pos
72 self.lineno = lineno
75class ParserLex:
76 """Class which defines PLY lexer.
77 """
79 @classmethod
80 def make_lexer(cls, reflags=0, **kwargs):
81 """Factory for lexers.
83 Returns
84 -------
85 `ply.lex.Lexer` instance.
86 """
88 # make sure that flags that we need are there
89 kw = dict(reflags=reflags | re.IGNORECASE | re.VERBOSE)
90 kw.update(kwargs)
92 return lex.lex(object=cls(), **kw)
94 # literals = ""
96 # reserved words in a grammar.
97 # SQL has reserved words which we could potentially make reserved in our
98 # grammar too, for now try to pretend we don't care about SQL
99 reserved = dict(
100 # IS="IS",
101 IN="IN",
102 # NULL="NULL",
103 OR="OR",
104 AND="AND",
105 NOT="NOT",
106 # BETWEEN="BETWEEN",
107 # LIKE="LIKE",
108 # ESCAPE="ESCAPE",
109 # REGEXP="REGEXP"
110 )
112 # List of token names.
113 tokens = (
114 'NUMERIC_LITERAL',
115 'TIME_LITERAL',
116 'STRING_LITERAL',
117 'RANGE_LITERAL',
118 # 'TIME_LITERAL',
119 # 'DURATION_LITERAL',
120 'IDENTIFIER',
121 'LPAREN', 'RPAREN',
122 'EQ', 'NE', 'LT', 'LE', 'GT', 'GE',
123 'ADD', 'SUB', 'MUL', 'DIV', 'MOD',
124 'COMMA'
125 ) + tuple(reserved.values())
127 # Regular expression rules for simple tokens
128 t_LPAREN = r'\('
129 t_RPAREN = r'\)'
130 t_EQ = '='
131 t_NE = '!='
132 t_LT = '<'
133 t_LE = '<='
134 t_GT = '>'
135 t_GE = '>='
136 t_ADD = r'\+'
137 t_SUB = '-'
138 t_MUL = r'\*'
139 t_DIV = '/'
140 t_MOD = '%'
141 t_COMMA = ','
143 # A string containing ignored characters (spaces and tabs)
144 t_ignore = ' \t'
146 # Define a rule so we can track line numbers
147 def t_newline(self, t):
148 r'\n+'
149 t.lexer.lineno += len(t.value)
151 # quoted string prefixed with 'T'
152 def t_TIME_LITERAL(self, t):
153 r"T'.*?'"
154 # strip quotes
155 t.value = t.value[2:-1]
156 return t
158 # quoted string
159 def t_STRING_LITERAL(self, t):
160 r"'.*?'"
161 # strip quotes
162 t.value = t.value[1:-1]
163 return t
165 # range literal in format N..M[:S], spaces allowed, see _RE_RANGE
166 @lex.TOKEN(_RE_RANGE)
167 def t_RANGE_LITERAL(self, t):
168 match = re.match(_RE_RANGE, t.value)
169 start = int(match.group("start"))
170 stop = int(match.group("stop"))
171 stride = match.group("stride")
172 if stride is not None:
173 stride = int(stride)
174 t.value = (start, stop, stride)
175 return t
177 # numbers are used as strings by parser, do not convert
178 def t_NUMERIC_LITERAL(self, t):
179 r"""\d+(\.\d*)?(e[-+]?\d+)? # 1, 1., 1.1, 1e10, 1.1e-10, etc.
180 |
181 \.\d+(e[-+]?\d+)? # .1, .1e10, .1e+10
182 """
183 return t
185 # identifiers can have dot, and we only support ASCII
186 def t_IDENTIFIER(self, t):
187 r"[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)?"
188 # Check for reserved words
189 t.type = self.reserved.get(t.value.upper(), 'IDENTIFIER')
190 return t
192 def t_error(self, t):
193 "Error handling rule"
194 lexer = t.lexer
195 raise ParserLexError(lexer.lexdata, t.value, lexer.lexpos, lexer.lineno)