Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22"""Module which defines PLY lexer for user expressions parsed by pre-flight. 

23""" 

24 

25__all__ = ["ParserLex", "ParserLexError"] 

26 

27# ------------------------------- 

28# Imports of standard modules -- 

29# ------------------------------- 

30import re 

31 

32# ----------------------------- 

33# Imports for other modules -- 

34# ----------------------------- 

35from .ply import lex 

36 

37# ---------------------------------- 

38# Local non-exported definitions -- 

39# ---------------------------------- 

40 

41_RE_RANGE = r"(?P<start>-?\d+)\s*\.\.\s*(?P<stop>-?\d+)(\s*:\s*(?P<stride>[1-9]\d*))?" 

42"""Regular expression to match range literal in the form NUM..NUM[:NUM], 

43this must match t_RANGE_LITERAL docstring. 

44""" 

45 

46# ------------------------ 

47# Exported definitions -- 

48# ------------------------ 

49 

50 

51class ParserLexError(Exception): 

52 """Exception raised for lex-phase errors. 

53 

54 Attributes 

55 ---------- 

56 expression : str 

57 Full initial expression being parsed 

58 remain : str 

59 Remaining non-parsed part of the expression 

60 pos : int 

61 Current parsing position, offset from beginning of expression in 

62 characters 

63 lineno : int 

64 Current line number in the expression 

65 """ 

66 

67 def __init__(self, expression, remain, pos, lineno): 

68 Exception.__init__(self, "Unexpected character at position {}".format(pos)) 

69 self.expression = expression 

70 self.remain = remain 

71 self.pos = pos 

72 self.lineno = lineno 

73 

74 

75class ParserLex: 

76 """Class which defines PLY lexer. 

77 """ 

78 

79 @classmethod 

80 def make_lexer(cls, reflags=0, **kwargs): 

81 """Factory for lexers. 

82 

83 Returns 

84 ------- 

85 `ply.lex.Lexer` instance. 

86 """ 

87 

88 # make sure that flags that we need are there 

89 kw = dict(reflags=reflags | re.IGNORECASE | re.VERBOSE) 

90 kw.update(kwargs) 

91 

92 return lex.lex(object=cls(), **kw) 

93 

94 # literals = "" 

95 

96 # reserved words in a grammar. 

97 # SQL has reserved words which we could potentially make reserved in our 

98 # grammar too, for now try to pretend we don't care about SQL 

99 reserved = dict( 

100 # IS="IS", 

101 IN="IN", 

102 # NULL="NULL", 

103 OR="OR", 

104 AND="AND", 

105 NOT="NOT", 

106 # BETWEEN="BETWEEN", 

107 # LIKE="LIKE", 

108 # ESCAPE="ESCAPE", 

109 # REGEXP="REGEXP" 

110 ) 

111 

112 # List of token names. 

113 tokens = ( 

114 'NUMERIC_LITERAL', 

115 'TIME_LITERAL', 

116 'STRING_LITERAL', 

117 'RANGE_LITERAL', 

118 # 'TIME_LITERAL', 

119 # 'DURATION_LITERAL', 

120 'IDENTIFIER', 

121 'LPAREN', 'RPAREN', 

122 'EQ', 'NE', 'LT', 'LE', 'GT', 'GE', 

123 'ADD', 'SUB', 'MUL', 'DIV', 'MOD', 

124 'COMMA' 

125 ) + tuple(reserved.values()) 

126 

127 # Regular expression rules for simple tokens 

128 t_LPAREN = r'\(' 

129 t_RPAREN = r'\)' 

130 t_EQ = '=' 

131 t_NE = '!=' 

132 t_LT = '<' 

133 t_LE = '<=' 

134 t_GT = '>' 

135 t_GE = '>=' 

136 t_ADD = r'\+' 

137 t_SUB = '-' 

138 t_MUL = r'\*' 

139 t_DIV = '/' 

140 t_MOD = '%' 

141 t_COMMA = ',' 

142 

143 # A string containing ignored characters (spaces and tabs) 

144 t_ignore = ' \t' 

145 

146 # Define a rule so we can track line numbers 

147 def t_newline(self, t): 

148 r'\n+' 

149 t.lexer.lineno += len(t.value) 

150 

151 # quoted string prefixed with 'T' 

152 def t_TIME_LITERAL(self, t): 

153 r"T'.*?'" 

154 # strip quotes 

155 t.value = t.value[2:-1] 

156 return t 

157 

158 # quoted string 

159 def t_STRING_LITERAL(self, t): 

160 r"'.*?'" 

161 # strip quotes 

162 t.value = t.value[1:-1] 

163 return t 

164 

165 # range literal in format N..M[:S], spaces allowed, see _RE_RANGE 

166 @lex.TOKEN(_RE_RANGE) 

167 def t_RANGE_LITERAL(self, t): 

168 match = re.match(_RE_RANGE, t.value) 

169 start = int(match.group("start")) 

170 stop = int(match.group("stop")) 

171 stride = match.group("stride") 

172 if stride is not None: 

173 stride = int(stride) 

174 t.value = (start, stop, stride) 

175 return t 

176 

177 # numbers are used as strings by parser, do not convert 

178 def t_NUMERIC_LITERAL(self, t): 

179 r"""\d+(\.\d*)?(e[-+]?\d+)? # 1, 1., 1.1, 1e10, 1.1e-10, etc. 

180 | 

181 \.\d+(e[-+]?\d+)? # .1, .1e10, .1e+10 

182 """ 

183 return t 

184 

185 # identifiers can have dot, and we only support ASCII 

186 def t_IDENTIFIER(self, t): 

187 r"[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)?" 

188 # Check for reserved words 

189 t.type = self.reserved.get(t.value.upper(), 'IDENTIFIER') 

190 return t 

191 

192 def t_error(self, t): 

193 "Error handling rule" 

194 lexer = t.lexer 

195 raise ParserLexError(lexer.lexdata, t.value, lexer.lexpos, lexer.lineno)