Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py: 53%

69 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:53 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <https://www.gnu.org/licenses/>. 

27 

28# type: ignore 

29 

30"""Module which defines PLY lexer for user expressions parsed by pre-flight. 

31""" 

32 

33__all__ = ["ParserLex", "ParserLexError"] 

34 

35# ------------------------------- 

36# Imports of standard modules -- 

37# ------------------------------- 

38import re 

39 

40# ----------------------------- 

41# Imports for other modules -- 

42# ----------------------------- 

43from .ply import lex 

44 

45# ---------------------------------- 

46# Local non-exported definitions -- 

47# ---------------------------------- 

48 

49_RE_RANGE = r"(?P<start>-?\d+)\s*\.\.\s*(?P<stop>-?\d+)(\s*:\s*(?P<stride>[1-9]\d*))?" 

50"""Regular expression to match range literal in the form NUM..NUM[:NUM], 

51this must match t_RANGE_LITERAL docstring. 

52""" 

53 

54# ------------------------ 

55# Exported definitions -- 

56# ------------------------ 

57 

58 

59class ParserLexError(Exception): 

60 """Exception raised for lex-phase errors. 

61 

62 Attributes 

63 ---------- 

64 expression : str 

65 Full initial expression being parsed 

66 remain : str 

67 Remaining non-parsed part of the expression 

68 pos : int 

69 Current parsing position, offset from beginning of expression in 

70 characters 

71 lineno : int 

72 Current line number in the expression 

73 """ 

74 

75 def __init__(self, expression, remain, pos, lineno): 

76 Exception.__init__(self, f"Unexpected character at position {pos}") 

77 self.expression = expression 

78 self.remain = remain 

79 self.pos = pos 

80 self.lineno = lineno 

81 

82 

83class ParserLex: 

84 """Class which defines PLY lexer.""" 

85 

86 @classmethod 

87 def make_lexer(cls, reflags=0, **kwargs): 

88 """Return lexer. 

89 

90 Returns 

91 ------- 

92 `ply.lex.Lexer` instance. 

93 """ 

94 # make sure that flags that we need are there 

95 kw = dict(reflags=reflags | re.IGNORECASE | re.VERBOSE) 

96 kw.update(kwargs) 

97 

98 return lex.lex(object=cls(), **kw) 

99 

100 # literals = "" 

101 

102 # reserved words in a grammar. 

103 # SQL has reserved words which we could potentially make reserved in our 

104 # grammar too, for now try to pretend we don't care about SQL 

105 reserved = dict( 

106 # IS="IS", 

107 IN="IN", 

108 # NULL="NULL", 

109 OR="OR", 

110 AND="AND", 

111 NOT="NOT", 

112 OVERLAPS="OVERLAPS", 

113 # BETWEEN="BETWEEN", 

114 # LIKE="LIKE", 

115 # ESCAPE="ESCAPE", 

116 # REGEXP="REGEXP" 

117 ) 

118 

119 # List of token names. 

120 tokens = ( 

121 "NUMERIC_LITERAL", 

122 "TIME_LITERAL", 

123 "STRING_LITERAL", 

124 "RANGE_LITERAL", 

125 # 'DURATION_LITERAL', 

126 "QUALIFIED_IDENTIFIER", 

127 "SIMPLE_IDENTIFIER", 

128 "LPAREN", 

129 "RPAREN", 

130 "EQ", 

131 "NE", 

132 "LT", 

133 "LE", 

134 "GT", 

135 "GE", 

136 "ADD", 

137 "SUB", 

138 "MUL", 

139 "DIV", 

140 "MOD", 

141 "COMMA", 

142 ) + tuple(reserved.values()) 

143 

144 # Regular expression rules for simple tokens 

145 t_LPAREN = r"\(" 

146 t_RPAREN = r"\)" 

147 t_EQ = "=" 

148 t_NE = "!=" 

149 t_LT = "<" 

150 t_LE = "<=" 

151 t_GT = ">" 

152 t_GE = ">=" 

153 t_ADD = r"\+" 

154 t_SUB = "-" 

155 t_MUL = r"\*" 

156 t_DIV = "/" 

157 t_MOD = "%" 

158 t_COMMA = "," 

159 

160 # A string containing ignored characters (spaces and tabs) 

161 t_ignore = " \t" 

162 

163 # Define a rule so we can track line numbers 

164 def t_newline(self, t): 

165 r"""\n+""" 

166 t.lexer.lineno += len(t.value) 

167 

168 # quoted string prefixed with 'T' 

169 def t_TIME_LITERAL(self, t): 

170 """T'.*?'""" 

171 # strip quotes 

172 t.value = t.value[2:-1] 

173 return t 

174 

175 # quoted string 

176 def t_STRING_LITERAL(self, t): 

177 """'.*?'""" 

178 # strip quotes 

179 t.value = t.value[1:-1] 

180 return t 

181 

182 # range literal in format N..M[:S], spaces allowed, see _RE_RANGE 

183 @lex.TOKEN(_RE_RANGE) 

184 def t_RANGE_LITERAL(self, t): 

185 match = re.match(_RE_RANGE, t.value) 

186 start = int(match.group("start")) 

187 stop = int(match.group("stop")) 

188 stride = match.group("stride") 

189 if stride is not None: 

190 stride = int(stride) 

191 t.value = (start, stop, stride) 

192 return t 

193 

194 # numbers are used as strings by parser, do not convert 

195 def t_NUMERIC_LITERAL(self, t): 

196 r"""\d+(\.\d*)?(e[-+]?\d+)? # 1, 1., 1.1, 1e10, 1.1e-10, etc. 

197 | 

198 \.\d+(e[-+]?\d+)? # .1, .1e10, .1e+10 

199 """ 

200 return t 

201 

202 # qualified identifiers have one or two dots 

203 def t_QUALIFIED_IDENTIFIER(self, t): 

204 r"""[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*){1,2}""" 

205 t.type = "QUALIFIED_IDENTIFIER" 

206 return t 

207 

208 # we only support ASCII in identifier names 

209 def t_SIMPLE_IDENTIFIER(self, t): 

210 """[a-zA-Z_][a-zA-Z0-9_]*""" 

211 # Check for reserved words and make sure they are upper case 

212 reserved = self.reserved.get(t.value.upper()) 

213 if reserved is not None: 

214 t.type = reserved 

215 t.value = reserved 

216 else: 

217 t.type = "SIMPLE_IDENTIFIER" 

218 return t 

219 

220 def t_error(self, t): 

221 """Error handling rule""" 

222 lexer = t.lexer 

223 raise ParserLexError(lexer.lexdata, t.value, lexer.lexpos, lexer.lineno)