Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py: 51%

69 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-28 10:10 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <https://www.gnu.org/licenses/>. 

21 

22# type: ignore 

23 

24"""Module which defines PLY lexer for user expressions parsed by pre-flight. 

25""" 

26 

27__all__ = ["ParserLex", "ParserLexError"] 

28 

29# ------------------------------- 

30# Imports of standard modules -- 

31# ------------------------------- 

32import re 

33 

34# ----------------------------- 

35# Imports for other modules -- 

36# ----------------------------- 

37from .ply import lex 

38 

39# ---------------------------------- 

40# Local non-exported definitions -- 

41# ---------------------------------- 

42 

43_RE_RANGE = r"(?P<start>-?\d+)\s*\.\.\s*(?P<stop>-?\d+)(\s*:\s*(?P<stride>[1-9]\d*))?" 

44"""Regular expression to match range literal in the form NUM..NUM[:NUM], 

45this must match t_RANGE_LITERAL docstring. 

46""" 

47 

48# ------------------------ 

49# Exported definitions -- 

50# ------------------------ 

51 

52 

53class ParserLexError(Exception): 

54 """Exception raised for lex-phase errors. 

55 

56 Attributes 

57 ---------- 

58 expression : str 

59 Full initial expression being parsed 

60 remain : str 

61 Remaining non-parsed part of the expression 

62 pos : int 

63 Current parsing position, offset from beginning of expression in 

64 characters 

65 lineno : int 

66 Current line number in the expression 

67 """ 

68 

69 def __init__(self, expression, remain, pos, lineno): 

70 Exception.__init__(self, f"Unexpected character at position {pos}") 

71 self.expression = expression 

72 self.remain = remain 

73 self.pos = pos 

74 self.lineno = lineno 

75 

76 

77class ParserLex: 

78 """Class which defines PLY lexer.""" 

79 

80 @classmethod 

81 def make_lexer(cls, reflags=0, **kwargs): 

82 """Return lexer. 

83 

84 Returns 

85 ------- 

86 `ply.lex.Lexer` instance. 

87 """ 

88 # make sure that flags that we need are there 

89 kw = dict(reflags=reflags | re.IGNORECASE | re.VERBOSE) 

90 kw.update(kwargs) 

91 

92 return lex.lex(object=cls(), **kw) 

93 

94 # literals = "" 

95 

96 # reserved words in a grammar. 

97 # SQL has reserved words which we could potentially make reserved in our 

98 # grammar too, for now try to pretend we don't care about SQL 

99 reserved = dict( 

100 # IS="IS", 

101 IN="IN", 

102 # NULL="NULL", 

103 OR="OR", 

104 AND="AND", 

105 NOT="NOT", 

106 OVERLAPS="OVERLAPS", 

107 # BETWEEN="BETWEEN", 

108 # LIKE="LIKE", 

109 # ESCAPE="ESCAPE", 

110 # REGEXP="REGEXP" 

111 ) 

112 

113 # List of token names. 

114 tokens = ( 

115 "NUMERIC_LITERAL", 

116 "TIME_LITERAL", 

117 "STRING_LITERAL", 

118 "RANGE_LITERAL", 

119 # 'DURATION_LITERAL', 

120 "QUALIFIED_IDENTIFIER", 

121 "SIMPLE_IDENTIFIER", 

122 "LPAREN", 

123 "RPAREN", 

124 "EQ", 

125 "NE", 

126 "LT", 

127 "LE", 

128 "GT", 

129 "GE", 

130 "ADD", 

131 "SUB", 

132 "MUL", 

133 "DIV", 

134 "MOD", 

135 "COMMA", 

136 ) + tuple(reserved.values()) 

137 

138 # Regular expression rules for simple tokens 

139 t_LPAREN = r"\(" 

140 t_RPAREN = r"\)" 

141 t_EQ = "=" 

142 t_NE = "!=" 

143 t_LT = "<" 

144 t_LE = "<=" 

145 t_GT = ">" 

146 t_GE = ">=" 

147 t_ADD = r"\+" 

148 t_SUB = "-" 

149 t_MUL = r"\*" 

150 t_DIV = "/" 

151 t_MOD = "%" 

152 t_COMMA = "," 

153 

154 # A string containing ignored characters (spaces and tabs) 

155 t_ignore = " \t" 

156 

157 # Define a rule so we can track line numbers 

158 def t_newline(self, t): 

159 r"""\n+""" 

160 t.lexer.lineno += len(t.value) 

161 

162 # quoted string prefixed with 'T' 

163 def t_TIME_LITERAL(self, t): 

164 """T'.*?'""" 

165 # strip quotes 

166 t.value = t.value[2:-1] 

167 return t 

168 

169 # quoted string 

170 def t_STRING_LITERAL(self, t): 

171 """'.*?'""" 

172 # strip quotes 

173 t.value = t.value[1:-1] 

174 return t 

175 

176 # range literal in format N..M[:S], spaces allowed, see _RE_RANGE 

177 @lex.TOKEN(_RE_RANGE) 

178 def t_RANGE_LITERAL(self, t): 

179 match = re.match(_RE_RANGE, t.value) 

180 start = int(match.group("start")) 

181 stop = int(match.group("stop")) 

182 stride = match.group("stride") 

183 if stride is not None: 

184 stride = int(stride) 

185 t.value = (start, stop, stride) 

186 return t 

187 

188 # numbers are used as strings by parser, do not convert 

189 def t_NUMERIC_LITERAL(self, t): 

190 r"""\d+(\.\d*)?(e[-+]?\d+)? # 1, 1., 1.1, 1e10, 1.1e-10, etc. 

191 | 

192 \.\d+(e[-+]?\d+)? # .1, .1e10, .1e+10 

193 """ 

194 return t 

195 

196 # qualified identifiers have one or two dots 

197 def t_QUALIFIED_IDENTIFIER(self, t): 

198 r"""[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*){1,2}""" 

199 t.type = "QUALIFIED_IDENTIFIER" 

200 return t 

201 

202 # we only support ASCII in identifier names 

203 def t_SIMPLE_IDENTIFIER(self, t): 

204 """[a-zA-Z_][a-zA-Z0-9_]*""" 

205 # Check for reserved words and make sure they are upper case 

206 reserved = self.reserved.get(t.value.upper()) 

207 if reserved is not None: 

208 t.type = reserved 

209 t.value = reserved 

210 else: 

211 t.type = "SIMPLE_IDENTIFIER" 

212 return t 

213 

214 def t_error(self, t): 

215 """Error handling rule""" 

216 lexer = t.lexer 

217 raise ParserLexError(lexer.lexdata, t.value, lexer.lexpos, lexer.lineno)