Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/ply/yacc.py: 5%

1929 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-05 02:04 -0800

1# ----------------------------------------------------------------------------- 

2# ply: yacc.py 

3# 

4# Copyright (C) 2001-2018 

5# David M. Beazley (Dabeaz LLC) 

6# All rights reserved. 

7# 

8# Redistribution and use in source and binary forms, with or without 

9# modification, are permitted provided that the following conditions are 

10# met: 

11# 

12# * Redistributions of source code must retain the above copyright notice, 

13# this list of conditions and the following disclaimer. 

14# * Redistributions in binary form must reproduce the above copyright notice, 

15# this list of conditions and the following disclaimer in the documentation 

16# and/or other materials provided with the distribution. 

17# * Neither the name of the David Beazley or Dabeaz LLC may be used to 

18# endorse or promote products derived from this software without 

19# specific prior written permission. 

20# 

21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

32# ----------------------------------------------------------------------------- 

33# 

34# This implements an LR parser that is constructed from grammar rules defined 

35# as Python functions. The grammar is specified by supplying the BNF inside 

36# Python documentation strings. The inspiration for this technique was borrowed 

37# from John Aycock's Spark parsing system. PLY might be viewed as cross between 

38# Spark and the GNU bison utility. 

39# 

40# The current implementation is only somewhat object-oriented. The 

41# LR parser itself is defined in terms of an object (which allows multiple 

42# parsers to co-exist). However, most of the variables used during table 

43# construction are defined in terms of global variables. Users shouldn't 

44# notice unless they are trying to define multiple parsers at the same 

45# time using threads (in which case they should have their head examined). 

46# 

47# This implementation supports both SLR and LALR(1) parsing. LALR(1) 

48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 

49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 

50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 

51# by the more efficient DeRemer and Pennello algorithm. 

52# 

53# :::::::: WARNING ::::::: 

54# 

55# Construction of LR parsing tables is fairly complicated and expensive. 

56# To make this module run fast, a *LOT* of work has been put into 

57# optimization---often at the expensive of readability and what might 

58# consider to be good Python "coding style." Modify the code at your 

59# own risk! 

60# ---------------------------------------------------------------------------- 

61 

62import inspect 

63import os.path 

64import re 

65import sys 

66import types 

67import warnings 

68 

69__version__ = "3.11" 

70__tabversion__ = "3.10" 

71 

72# ----------------------------------------------------------------------------- 

73# === User configurable parameters === 

74# 

75# Change these to modify the default behavior of yacc (if you wish) 

76# ----------------------------------------------------------------------------- 

77 

78yaccdebug = True # Debugging mode. If set, yacc generates a 

79# a 'parser.out' file in the current directory 

80 

81debug_file = "parser.out" # Default name of the debugging file 

82tab_module = "parsetab" # Default name of the table module 

83default_lr = "LALR" # Default LR table generation method 

84 

85error_count = 3 # Number of symbols that must be shifted to leave recovery mode 

86 

87yaccdevel = False # Set to True if developing yacc. This turns off optimized 

88# implementations of certain functions. 

89 

90resultlimit = 40 # Size limit of results when running in debug mode. 

91 

92pickle_protocol = 0 # Protocol to use when writing pickle files 

93 

94# String type-checking compatibility 

95if sys.version_info[0] < 3: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 string_types = basestring 

97else: 

98 string_types = str 

99 

100MAXINT = sys.maxsize 

101 

102# This object is a stand-in for a logging object created by the 

103# logging module. PLY will use this by default to create things 

104# such as the parser.out file. If a user wants more detailed 

105# information, they can create their own logging object and pass 

106# it into PLY. 

107 

108 

109class PlyLogger(object): 

110 def __init__(self, f): 

111 self.f = f 

112 

113 def debug(self, msg, *args, **kwargs): 

114 self.f.write((msg % args) + "\n") 

115 

116 info = debug 

117 

118 def warning(self, msg, *args, **kwargs): 

119 self.f.write("WARNING: " + (msg % args) + "\n") 

120 

121 def error(self, msg, *args, **kwargs): 

122 self.f.write("ERROR: " + (msg % args) + "\n") 

123 

124 critical = debug 

125 

126 

127# Null logger is used when no output is generated. Does nothing. 

128class NullLogger(object): 

129 def __getattribute__(self, name): 

130 return self 

131 

132 def __call__(self, *args, **kwargs): 

133 return self 

134 

135 

136# Exception raised for yacc-related errors 

137class YaccError(Exception): 

138 pass 

139 

140 

141# Format the result message that the parser produces when running in debug mode. 

142def format_result(r): 

143 repr_str = repr(r) 

144 if "\n" in repr_str: 

145 repr_str = repr(repr_str) 

146 if len(repr_str) > resultlimit: 

147 repr_str = repr_str[:resultlimit] + " ..." 

148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__, id(r), repr_str) 

149 return result 

150 

151 

152# Format stack entries when the parser is running in debug mode 

153def format_stack_entry(r): 

154 repr_str = repr(r) 

155 if "\n" in repr_str: 

156 repr_str = repr(repr_str) 

157 if len(repr_str) < 16: 

158 return repr_str 

159 else: 

160 return "<%s @ 0x%x>" % (type(r).__name__, id(r)) 

161 

162 

163# Panic mode error recovery support. This feature is being reworked--much of the 

164# code here is to offer a deprecation/backwards compatible transition 

165 

166_errok = None 

167_token = None 

168_restart = None 

169_warnmsg = """PLY: Don't use global functions errok(), token(), and restart() in p_error(). 

170Instead, invoke the methods on the associated parser instance: 

171 

172 def p_error(p): 

173 ... 

174 # Use parser.errok(), parser.token(), parser.restart() 

175 ... 

176 

177 parser = yacc.yacc() 

178""" 

179 

180 

181def errok(): 

182 warnings.warn(_warnmsg) 

183 return _errok() 

184 

185 

186def restart(): 

187 warnings.warn(_warnmsg) 

188 return _restart() 

189 

190 

191def token(): 

192 warnings.warn(_warnmsg) 

193 return _token() 

194 

195 

196# Utility function to call the p_error() function with some deprecation hacks 

197def call_errorfunc(errorfunc, token, parser): 

198 global _errok, _token, _restart 

199 _errok = parser.errok 

200 _token = parser.token 

201 _restart = parser.restart 

202 r = errorfunc(token) 

203 try: 

204 del _errok, _token, _restart 

205 except NameError: 

206 pass 

207 return r 

208 

209 

210# ----------------------------------------------------------------------------- 

211# === LR Parsing Engine === 

212# 

213# The following classes are used for the LR parser itself. These are not 

214# used during table construction and are independent of the actual LR 

215# table generation algorithm 

216# ----------------------------------------------------------------------------- 

217 

218# This class is used to hold non-terminal grammar symbols during parsing. 

219# It normally has the following attributes set: 

220# .type = Grammar symbol type 

221# .value = Symbol value 

222# .lineno = Starting line number 

223# .endlineno = Ending line number (optional, set automatically) 

224# .lexpos = Starting lex position 

225# .endlexpos = Ending lex position (optional, set automatically) 

226 

227 

228class YaccSymbol: 

229 def __str__(self): 

230 return self.type 

231 

232 def __repr__(self): 

233 return str(self) 

234 

235 

236# This class is a wrapper around the objects actually passed to each 

237# grammar rule. Index lookup and assignment actually assign the 

238# .value attribute of the underlying YaccSymbol object. 

239# The lineno() method returns the line number of a given 

240# item (or 0 if not defined). The linespan() method returns 

241# a tuple of (startline,endline) representing the range of lines 

242# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 

243# representing the range of positional information for a symbol. 

244 

245 

246class YaccProduction: 

247 def __init__(self, s, stack=None): 

248 self.slice = s 

249 self.stack = stack 

250 self.lexer = None 

251 self.parser = None 

252 

253 def __getitem__(self, n): 

254 if isinstance(n, slice): 

255 return [s.value for s in self.slice[n]] 

256 elif n >= 0: 

257 return self.slice[n].value 

258 else: 

259 return self.stack[n].value 

260 

261 def __setitem__(self, n, v): 

262 self.slice[n].value = v 

263 

264 def __getslice__(self, i, j): 

265 return [s.value for s in self.slice[i:j]] 

266 

267 def __len__(self): 

268 return len(self.slice) 

269 

270 def lineno(self, n): 

271 return getattr(self.slice[n], "lineno", 0) 

272 

273 def set_lineno(self, n, lineno): 

274 self.slice[n].lineno = lineno 

275 

276 def linespan(self, n): 

277 startline = getattr(self.slice[n], "lineno", 0) 

278 endline = getattr(self.slice[n], "endlineno", startline) 

279 return startline, endline 

280 

281 def lexpos(self, n): 

282 return getattr(self.slice[n], "lexpos", 0) 

283 

284 def set_lexpos(self, n, lexpos): 

285 self.slice[n].lexpos = lexpos 

286 

287 def lexspan(self, n): 

288 startpos = getattr(self.slice[n], "lexpos", 0) 

289 endpos = getattr(self.slice[n], "endlexpos", startpos) 

290 return startpos, endpos 

291 

292 def error(self): 

293 raise SyntaxError 

294 

295 

296# ----------------------------------------------------------------------------- 

297# == LRParser == 

298# 

299# The LR Parsing engine. 

300# ----------------------------------------------------------------------------- 

301 

302 

303class LRParser: 

304 def __init__(self, lrtab, errorf): 

305 self.productions = lrtab.lr_productions 

306 self.action = lrtab.lr_action 

307 self.goto = lrtab.lr_goto 

308 self.errorfunc = errorf 

309 self.set_defaulted_states() 

310 self.errorok = True 

311 

312 def errok(self): 

313 self.errorok = True 

314 

315 def restart(self): 

316 del self.statestack[:] 

317 del self.symstack[:] 

318 sym = YaccSymbol() 

319 sym.type = "$end" 

320 self.symstack.append(sym) 

321 self.statestack.append(0) 

322 

323 # Defaulted state support. 

324 # This method identifies parser states where there is only one possible reduction action. 

325 # For such states, the parser can make a choose to make a rule reduction without consuming 

326 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in 

327 # certain kinds of advanced parsing situations where the lexer and parser interact with 

328 # each other or change states (i.e., manipulation of scope, lexer states, etc.). 

329 # 

330 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions 

331 def set_defaulted_states(self): 

332 self.defaulted_states = {} 

333 for state, actions in self.action.items(): 

334 rules = list(actions.values()) 

335 if len(rules) == 1 and rules[0] < 0: 

336 self.defaulted_states[state] = rules[0] 

337 

338 def disable_defaulted_states(self): 

339 self.defaulted_states = {} 

340 

341 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

342 if debug or yaccdevel: 

343 if isinstance(debug, int): 

344 debug = PlyLogger(sys.stderr) 

345 return self.parsedebug(input, lexer, debug, tracking, tokenfunc) 

346 elif tracking: 

347 return self.parseopt(input, lexer, debug, tracking, tokenfunc) 

348 else: 

349 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) 

350 

351 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

352 # parsedebug(). 

353 # 

354 # This is the debugging enabled version of parse(). All changes made to the 

355 # parsing engine should be made here. Optimized versions of this function 

356 # are automatically created by the ply/ygen.py script. This script cuts out 

357 # sections enclosed in markers such as this: 

358 # 

359 # #--! DEBUG 

360 # statements 

361 # #--! DEBUG 

362 # 

363 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

364 

365 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

366 # --! parsedebug-start 

367 lookahead = None # Current lookahead symbol 

368 lookaheadstack = [] # Stack of lookahead symbols 

369 actions = self.action # Local reference to action table (to avoid lookup on self.) 

370 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

371 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

372 defaulted_states = self.defaulted_states # Local reference to defaulted states 

373 pslice = YaccProduction(None) # Production object passed to grammar rules 

374 errorcount = 0 # Used during error recovery 

375 

376 # --! DEBUG 

377 debug.info("PLY: PARSE DEBUG START") 

378 # --! DEBUG 

379 

380 # If no lexer was given, we will try to use the lex module 

381 if not lexer: 

382 from . import lex 

383 

384 lexer = lex.lexer 

385 

386 # Set up the lexer and parser objects on pslice 

387 pslice.lexer = lexer 

388 pslice.parser = self 

389 

390 # If input was supplied, pass to lexer 

391 if input is not None: 

392 lexer.input(input) 

393 

394 if tokenfunc is None: 

395 # Tokenize function 

396 get_token = lexer.token 

397 else: 

398 get_token = tokenfunc 

399 

400 # Set the parser() token method (sometimes used in error recovery) 

401 self.token = get_token 

402 

403 # Set up the state and symbol stacks 

404 

405 statestack = [] # Stack of parsing states 

406 self.statestack = statestack 

407 symstack = [] # Stack of grammar symbols 

408 self.symstack = symstack 

409 

410 pslice.stack = symstack # Put in the production 

411 errtoken = None # Err token 

412 

413 # The start state is assumed to be (0,$end) 

414 

415 statestack.append(0) 

416 sym = YaccSymbol() 

417 sym.type = "$end" 

418 symstack.append(sym) 

419 state = 0 

420 while True: 

421 # Get the next symbol on the input. If a lookahead symbol 

422 # is already set, we just use that. Otherwise, we'll pull 

423 # the next token off of the lookaheadstack or from the lexer 

424 

425 # --! DEBUG 

426 debug.debug("") 

427 debug.debug("State : %s", state) 

428 # --! DEBUG 

429 

430 if state not in defaulted_states: 

431 if not lookahead: 

432 if not lookaheadstack: 

433 lookahead = get_token() # Get the next token 

434 else: 

435 lookahead = lookaheadstack.pop() 

436 if not lookahead: 

437 lookahead = YaccSymbol() 

438 lookahead.type = "$end" 

439 

440 # Check the action table 

441 ltype = lookahead.type 

442 t = actions[state].get(ltype) 

443 else: 

444 t = defaulted_states[state] 

445 # --! DEBUG 

446 debug.debug("Defaulted state %s: Reduce using %d", state, -t) 

447 # --! DEBUG 

448 

449 # --! DEBUG 

450 debug.debug( 

451 "Stack : %s", 

452 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip(), 

453 ) 

454 # --! DEBUG 

455 

456 if t is not None: 

457 if t > 0: 

458 # shift a symbol on the stack 

459 statestack.append(t) 

460 state = t 

461 

462 # --! DEBUG 

463 debug.debug("Action : Shift and goto state %s", t) 

464 # --! DEBUG 

465 

466 symstack.append(lookahead) 

467 lookahead = None 

468 

469 # Decrease error count on successful shift 

470 if errorcount: 

471 errorcount -= 1 

472 continue 

473 

474 if t < 0: 

475 # reduce a symbol on the stack, emit a production 

476 p = prod[-t] 

477 pname = p.name 

478 plen = p.len 

479 

480 # Get production function 

481 sym = YaccSymbol() 

482 sym.type = pname # Production name 

483 sym.value = None 

484 

485 # --! DEBUG 

486 if plen: 

487 debug.info( 

488 "Action : Reduce rule [%s] with %s and goto state %d", 

489 p.str, 

490 "[" + ",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]]) + "]", 

491 goto[statestack[-1 - plen]][pname], 

492 ) 

493 else: 

494 debug.info( 

495 "Action : Reduce rule [%s] with %s and goto state %d", 

496 p.str, 

497 [], 

498 goto[statestack[-1]][pname], 

499 ) 

500 

501 # --! DEBUG 

502 

503 if plen: 

504 targ = symstack[-plen - 1 :] 

505 targ[0] = sym 

506 

507 # --! TRACKING 

508 if tracking: 

509 t1 = targ[1] 

510 sym.lineno = t1.lineno 

511 sym.lexpos = t1.lexpos 

512 t1 = targ[-1] 

513 sym.endlineno = getattr(t1, "endlineno", t1.lineno) 

514 sym.endlexpos = getattr(t1, "endlexpos", t1.lexpos) 

515 # --! TRACKING 

516 

517 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

518 # The code enclosed in this section is duplicated 

519 # below as a performance optimization. Make sure 

520 # changes get made in both locations. 

521 

522 pslice.slice = targ 

523 

524 try: 

525 # Call the grammar rule with our special slice object 

526 del symstack[-plen:] 

527 self.state = state 

528 p.callable(pslice) 

529 del statestack[-plen:] 

530 # --! DEBUG 

531 debug.info("Result : %s", format_result(pslice[0])) 

532 # --! DEBUG 

533 symstack.append(sym) 

534 state = goto[statestack[-1]][pname] 

535 statestack.append(state) 

536 except SyntaxError: 

537 # If an error was set. Enter error recovery state 

538 lookaheadstack.append(lookahead) # Save the current lookahead token 

539 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

540 statestack.pop() # Pop back one state (before the reduce) 

541 state = statestack[-1] 

542 sym.type = "error" 

543 sym.value = "error" 

544 lookahead = sym 

545 errorcount = error_count 

546 self.errorok = False 

547 

548 continue 

549 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

550 

551 else: 

552 # --! TRACKING 

553 if tracking: 

554 sym.lineno = lexer.lineno 

555 sym.lexpos = lexer.lexpos 

556 # --! TRACKING 

557 

558 targ = [sym] 

559 

560 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

561 # The code enclosed in this section is duplicated 

562 # above as a performance optimization. Make sure 

563 # changes get made in both locations. 

564 

565 pslice.slice = targ 

566 

567 try: 

568 # Call the grammar rule with our special slice object 

569 self.state = state 

570 p.callable(pslice) 

571 # --! DEBUG 

572 debug.info("Result : %s", format_result(pslice[0])) 

573 # --! DEBUG 

574 symstack.append(sym) 

575 state = goto[statestack[-1]][pname] 

576 statestack.append(state) 

577 except SyntaxError: 

578 # If an error was set. Enter error recovery state 

579 lookaheadstack.append(lookahead) # Save the current lookahead token 

580 statestack.pop() # Pop back one state (before the reduce) 

581 state = statestack[-1] 

582 sym.type = "error" 

583 sym.value = "error" 

584 lookahead = sym 

585 errorcount = error_count 

586 self.errorok = False 

587 

588 continue 

589 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

590 

591 if t == 0: 

592 n = symstack[-1] 

593 result = getattr(n, "value", None) 

594 # --! DEBUG 

595 debug.info("Done : Returning %s", format_result(result)) 

596 debug.info("PLY: PARSE DEBUG END") 

597 # --! DEBUG 

598 return result 

599 

600 if t is None: 

601 # --! DEBUG 

602 debug.error( 

603 "Error : %s", 

604 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip(), 

605 ) 

606 # --! DEBUG 

607 

608 # We have some kind of parsing error here. To handle 

609 # this, we are going to push the current token onto 

610 # the tokenstack and replace it with an 'error' token. 

611 # If there are any synchronization rules, they may 

612 # catch it. 

613 # 

614 # In addition to pushing the error token, we call call 

615 # the user defined p_error() function if this is the 

616 # first syntax error. This function is only called if 

617 # errorcount == 0. 

618 if errorcount == 0 or self.errorok: 

619 errorcount = error_count 

620 self.errorok = False 

621 errtoken = lookahead 

622 if errtoken.type == "$end": 

623 errtoken = None # End of file! 

624 if self.errorfunc: 

625 if errtoken and not hasattr(errtoken, "lexer"): 

626 errtoken.lexer = lexer 

627 self.state = state 

628 tok = call_errorfunc(self.errorfunc, errtoken, self) 

629 if self.errorok: 

630 # User must have done some kind of panic 

631 # mode recovery on their own. The 

632 # returned token is the next lookahead 

633 lookahead = tok 

634 errtoken = None 

635 continue 

636 else: 

637 if errtoken: 

638 if hasattr(errtoken, "lineno"): 

639 lineno = lookahead.lineno 

640 else: 

641 lineno = 0 

642 if lineno: 

643 sys.stderr.write( 

644 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type) 

645 ) 

646 else: 

647 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 

648 else: 

649 sys.stderr.write("yacc: Parse error in input. EOF\n") 

650 return 

651 

652 else: 

653 errorcount = error_count 

654 

655 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

656 # entire parse has been rolled back and we're completely hosed. The token is 

657 # discarded and we just keep going. 

658 

659 if len(statestack) <= 1 and lookahead.type != "$end": 

660 lookahead = None 

661 errtoken = None 

662 state = 0 

663 # Nuke the pushback stack 

664 del lookaheadstack[:] 

665 continue 

666 

667 # case 2: the statestack has a couple of entries on it, but we're 

668 # at the end of the file. nuke the top entry and generate an error token 

669 

670 # Start nuking entries on the stack 

671 if lookahead.type == "$end": 

672 # Whoa. We're really hosed here. Bail out 

673 return 

674 

675 if lookahead.type != "error": 

676 sym = symstack[-1] 

677 if sym.type == "error": 

678 # Hmmm. Error is on top of stack, we'll just nuke input 

679 # symbol and continue 

680 # --! TRACKING 

681 if tracking: 

682 sym.endlineno = getattr(lookahead, "lineno", sym.lineno) 

683 sym.endlexpos = getattr(lookahead, "lexpos", sym.lexpos) 

684 # --! TRACKING 

685 lookahead = None 

686 continue 

687 

688 # Create the error symbol for the first time and make it the new lookahead symbol 

689 t = YaccSymbol() 

690 t.type = "error" 

691 

692 if hasattr(lookahead, "lineno"): 

693 t.lineno = t.endlineno = lookahead.lineno 

694 if hasattr(lookahead, "lexpos"): 

695 t.lexpos = t.endlexpos = lookahead.lexpos 

696 t.value = lookahead 

697 lookaheadstack.append(lookahead) 

698 lookahead = t 

699 else: 

700 sym = symstack.pop() 

701 # --! TRACKING 

702 if tracking: 

703 lookahead.lineno = sym.lineno 

704 lookahead.lexpos = sym.lexpos 

705 # --! TRACKING 

706 statestack.pop() 

707 state = statestack[-1] 

708 

709 continue 

710 

711 # Call an error function here 

712 raise RuntimeError("yacc: internal parser error!!!\n") 

713 

714 # --! parsedebug-end 

715 

716 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

717 # parseopt(). 

718 # 

719 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! 

720 # This code is automatically generated by the ply/ygen.py script. Make 

721 # changes to the parsedebug() method instead. 

722 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

723 

724 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

725 # --! parseopt-start 

726 lookahead = None # Current lookahead symbol 

727 lookaheadstack = [] # Stack of lookahead symbols 

728 actions = self.action # Local reference to action table (to avoid lookup on self.) 

729 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

730 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

731 defaulted_states = self.defaulted_states # Local reference to defaulted states 

732 pslice = YaccProduction(None) # Production object passed to grammar rules 

733 errorcount = 0 # Used during error recovery 

734 

735 # If no lexer was given, we will try to use the lex module 

736 if not lexer: 

737 from . import lex 

738 

739 lexer = lex.lexer 

740 

741 # Set up the lexer and parser objects on pslice 

742 pslice.lexer = lexer 

743 pslice.parser = self 

744 

745 # If input was supplied, pass to lexer 

746 if input is not None: 

747 lexer.input(input) 

748 

749 if tokenfunc is None: 

750 # Tokenize function 

751 get_token = lexer.token 

752 else: 

753 get_token = tokenfunc 

754 

755 # Set the parser() token method (sometimes used in error recovery) 

756 self.token = get_token 

757 

758 # Set up the state and symbol stacks 

759 

760 statestack = [] # Stack of parsing states 

761 self.statestack = statestack 

762 symstack = [] # Stack of grammar symbols 

763 self.symstack = symstack 

764 

765 pslice.stack = symstack # Put in the production 

766 errtoken = None # Err token 

767 

768 # The start state is assumed to be (0,$end) 

769 

770 statestack.append(0) 

771 sym = YaccSymbol() 

772 sym.type = "$end" 

773 symstack.append(sym) 

774 state = 0 

775 while True: 

776 # Get the next symbol on the input. If a lookahead symbol 

777 # is already set, we just use that. Otherwise, we'll pull 

778 # the next token off of the lookaheadstack or from the lexer 

779 

780 if state not in defaulted_states: 

781 if not lookahead: 

782 if not lookaheadstack: 

783 lookahead = get_token() # Get the next token 

784 else: 

785 lookahead = lookaheadstack.pop() 

786 if not lookahead: 

787 lookahead = YaccSymbol() 

788 lookahead.type = "$end" 

789 

790 # Check the action table 

791 ltype = lookahead.type 

792 t = actions[state].get(ltype) 

793 else: 

794 t = defaulted_states[state] 

795 

796 if t is not None: 

797 if t > 0: 

798 # shift a symbol on the stack 

799 statestack.append(t) 

800 state = t 

801 

802 symstack.append(lookahead) 

803 lookahead = None 

804 

805 # Decrease error count on successful shift 

806 if errorcount: 

807 errorcount -= 1 

808 continue 

809 

810 if t < 0: 

811 # reduce a symbol on the stack, emit a production 

812 p = prod[-t] 

813 pname = p.name 

814 plen = p.len 

815 

816 # Get production function 

817 sym = YaccSymbol() 

818 sym.type = pname # Production name 

819 sym.value = None 

820 

821 if plen: 

822 targ = symstack[-plen - 1 :] 

823 targ[0] = sym 

824 

825 # --! TRACKING 

826 if tracking: 

827 t1 = targ[1] 

828 sym.lineno = t1.lineno 

829 sym.lexpos = t1.lexpos 

830 t1 = targ[-1] 

831 sym.endlineno = getattr(t1, "endlineno", t1.lineno) 

832 sym.endlexpos = getattr(t1, "endlexpos", t1.lexpos) 

833 # --! TRACKING 

834 

835 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

836 # The code enclosed in this section is duplicated 

837 # below as a performance optimization. Make sure 

838 # changes get made in both locations. 

839 

840 pslice.slice = targ 

841 

842 try: 

843 # Call the grammar rule with our special slice object 

844 del symstack[-plen:] 

845 self.state = state 

846 p.callable(pslice) 

847 del statestack[-plen:] 

848 symstack.append(sym) 

849 state = goto[statestack[-1]][pname] 

850 statestack.append(state) 

851 except SyntaxError: 

852 # If an error was set. Enter error recovery state 

853 lookaheadstack.append(lookahead) # Save the current lookahead token 

854 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

855 statestack.pop() # Pop back one state (before the reduce) 

856 state = statestack[-1] 

857 sym.type = "error" 

858 sym.value = "error" 

859 lookahead = sym 

860 errorcount = error_count 

861 self.errorok = False 

862 

863 continue 

864 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

865 

866 else: 

867 # --! TRACKING 

868 if tracking: 

869 sym.lineno = lexer.lineno 

870 sym.lexpos = lexer.lexpos 

871 # --! TRACKING 

872 

873 targ = [sym] 

874 

875 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

876 # The code enclosed in this section is duplicated 

877 # above as a performance optimization. Make sure 

878 # changes get made in both locations. 

879 

880 pslice.slice = targ 

881 

882 try: 

883 # Call the grammar rule with our special slice object 

884 self.state = state 

885 p.callable(pslice) 

886 symstack.append(sym) 

887 state = goto[statestack[-1]][pname] 

888 statestack.append(state) 

889 except SyntaxError: 

890 # If an error was set. Enter error recovery state 

891 lookaheadstack.append(lookahead) # Save the current lookahead token 

892 statestack.pop() # Pop back one state (before the reduce) 

893 state = statestack[-1] 

894 sym.type = "error" 

895 sym.value = "error" 

896 lookahead = sym 

897 errorcount = error_count 

898 self.errorok = False 

899 

900 continue 

901 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

902 

903 if t == 0: 

904 n = symstack[-1] 

905 result = getattr(n, "value", None) 

906 return result 

907 

908 if t is None: 

909 # We have some kind of parsing error here. To handle 

910 # this, we are going to push the current token onto 

911 # the tokenstack and replace it with an 'error' token. 

912 # If there are any synchronization rules, they may 

913 # catch it. 

914 # 

915 # In addition to pushing the error token, we call call 

916 # the user defined p_error() function if this is the 

917 # first syntax error. This function is only called if 

918 # errorcount == 0. 

919 if errorcount == 0 or self.errorok: 

920 errorcount = error_count 

921 self.errorok = False 

922 errtoken = lookahead 

923 if errtoken.type == "$end": 

924 errtoken = None # End of file! 

925 if self.errorfunc: 

926 if errtoken and not hasattr(errtoken, "lexer"): 

927 errtoken.lexer = lexer 

928 self.state = state 

929 tok = call_errorfunc(self.errorfunc, errtoken, self) 

930 if self.errorok: 

931 # User must have done some kind of panic 

932 # mode recovery on their own. The 

933 # returned token is the next lookahead 

934 lookahead = tok 

935 errtoken = None 

936 continue 

937 else: 

938 if errtoken: 

939 if hasattr(errtoken, "lineno"): 

940 lineno = lookahead.lineno 

941 else: 

942 lineno = 0 

943 if lineno: 

944 sys.stderr.write( 

945 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type) 

946 ) 

947 else: 

948 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 

949 else: 

950 sys.stderr.write("yacc: Parse error in input. EOF\n") 

951 return 

952 

953 else: 

954 errorcount = error_count 

955 

956 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

957 # entire parse has been rolled back and we're completely hosed. The token is 

958 # discarded and we just keep going. 

959 

960 if len(statestack) <= 1 and lookahead.type != "$end": 

961 lookahead = None 

962 errtoken = None 

963 state = 0 

964 # Nuke the pushback stack 

965 del lookaheadstack[:] 

966 continue 

967 

968 # case 2: the statestack has a couple of entries on it, but we're 

969 # at the end of the file. nuke the top entry and generate an error token 

970 

971 # Start nuking entries on the stack 

972 if lookahead.type == "$end": 

973 # Whoa. We're really hosed here. Bail out 

974 return 

975 

976 if lookahead.type != "error": 

977 sym = symstack[-1] 

978 if sym.type == "error": 

979 # Hmmm. Error is on top of stack, we'll just nuke input 

980 # symbol and continue 

981 # --! TRACKING 

982 if tracking: 

983 sym.endlineno = getattr(lookahead, "lineno", sym.lineno) 

984 sym.endlexpos = getattr(lookahead, "lexpos", sym.lexpos) 

985 # --! TRACKING 

986 lookahead = None 

987 continue 

988 

989 # Create the error symbol for the first time and make it the new lookahead symbol 

990 t = YaccSymbol() 

991 t.type = "error" 

992 

993 if hasattr(lookahead, "lineno"): 

994 t.lineno = t.endlineno = lookahead.lineno 

995 if hasattr(lookahead, "lexpos"): 

996 t.lexpos = t.endlexpos = lookahead.lexpos 

997 t.value = lookahead 

998 lookaheadstack.append(lookahead) 

999 lookahead = t 

1000 else: 

1001 sym = symstack.pop() 

1002 # --! TRACKING 

1003 if tracking: 

1004 lookahead.lineno = sym.lineno 

1005 lookahead.lexpos = sym.lexpos 

1006 # --! TRACKING 

1007 statestack.pop() 

1008 state = statestack[-1] 

1009 

1010 continue 

1011 

1012 # Call an error function here 

1013 raise RuntimeError("yacc: internal parser error!!!\n") 

1014 

1015 # --! parseopt-end 

1016 

1017 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1018 # parseopt_notrack(). 

1019 # 

1020 # Optimized version of parseopt() with line number tracking removed. 

1021 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated 

1022 # by the ply/ygen.py script. Make changes to the parsedebug() method instead. 

1023 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1024 

1025 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

1026 # --! parseopt-notrack-start 

1027 lookahead = None # Current lookahead symbol 

1028 lookaheadstack = [] # Stack of lookahead symbols 

1029 actions = self.action # Local reference to action table (to avoid lookup on self.) 

1030 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

1031 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

1032 defaulted_states = self.defaulted_states # Local reference to defaulted states 

1033 pslice = YaccProduction(None) # Production object passed to grammar rules 

1034 errorcount = 0 # Used during error recovery 

1035 

1036 # If no lexer was given, we will try to use the lex module 

1037 if not lexer: 

1038 from . import lex 

1039 

1040 lexer = lex.lexer 

1041 

1042 # Set up the lexer and parser objects on pslice 

1043 pslice.lexer = lexer 

1044 pslice.parser = self 

1045 

1046 # If input was supplied, pass to lexer 

1047 if input is not None: 

1048 lexer.input(input) 

1049 

1050 if tokenfunc is None: 

1051 # Tokenize function 

1052 get_token = lexer.token 

1053 else: 

1054 get_token = tokenfunc 

1055 

1056 # Set the parser() token method (sometimes used in error recovery) 

1057 self.token = get_token 

1058 

1059 # Set up the state and symbol stacks 

1060 

1061 statestack = [] # Stack of parsing states 

1062 self.statestack = statestack 

1063 symstack = [] # Stack of grammar symbols 

1064 self.symstack = symstack 

1065 

1066 pslice.stack = symstack # Put in the production 

1067 errtoken = None # Err token 

1068 

1069 # The start state is assumed to be (0,$end) 

1070 

1071 statestack.append(0) 

1072 sym = YaccSymbol() 

1073 sym.type = "$end" 

1074 symstack.append(sym) 

1075 state = 0 

1076 while True: 

1077 # Get the next symbol on the input. If a lookahead symbol 

1078 # is already set, we just use that. Otherwise, we'll pull 

1079 # the next token off of the lookaheadstack or from the lexer 

1080 

1081 if state not in defaulted_states: 

1082 if not lookahead: 

1083 if not lookaheadstack: 

1084 lookahead = get_token() # Get the next token 

1085 else: 

1086 lookahead = lookaheadstack.pop() 

1087 if not lookahead: 

1088 lookahead = YaccSymbol() 

1089 lookahead.type = "$end" 

1090 

1091 # Check the action table 

1092 ltype = lookahead.type 

1093 t = actions[state].get(ltype) 

1094 else: 

1095 t = defaulted_states[state] 

1096 

1097 if t is not None: 

1098 if t > 0: 

1099 # shift a symbol on the stack 

1100 statestack.append(t) 

1101 state = t 

1102 

1103 symstack.append(lookahead) 

1104 lookahead = None 

1105 

1106 # Decrease error count on successful shift 

1107 if errorcount: 

1108 errorcount -= 1 

1109 continue 

1110 

1111 if t < 0: 

1112 # reduce a symbol on the stack, emit a production 

1113 p = prod[-t] 

1114 pname = p.name 

1115 plen = p.len 

1116 

1117 # Get production function 

1118 sym = YaccSymbol() 

1119 sym.type = pname # Production name 

1120 sym.value = None 

1121 

1122 if plen: 

1123 targ = symstack[-plen - 1 :] 

1124 targ[0] = sym 

1125 

1126 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1127 # The code enclosed in this section is duplicated 

1128 # below as a performance optimization. Make sure 

1129 # changes get made in both locations. 

1130 

1131 pslice.slice = targ 

1132 

1133 try: 

1134 # Call the grammar rule with our special slice object 

1135 del symstack[-plen:] 

1136 self.state = state 

1137 p.callable(pslice) 

1138 del statestack[-plen:] 

1139 symstack.append(sym) 

1140 state = goto[statestack[-1]][pname] 

1141 statestack.append(state) 

1142 except SyntaxError: 

1143 # If an error was set. Enter error recovery state 

1144 lookaheadstack.append(lookahead) # Save the current lookahead token 

1145 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

1146 statestack.pop() # Pop back one state (before the reduce) 

1147 state = statestack[-1] 

1148 sym.type = "error" 

1149 sym.value = "error" 

1150 lookahead = sym 

1151 errorcount = error_count 

1152 self.errorok = False 

1153 

1154 continue 

1155 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1156 

1157 else: 

1158 targ = [sym] 

1159 

1160 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1161 # The code enclosed in this section is duplicated 

1162 # above as a performance optimization. Make sure 

1163 # changes get made in both locations. 

1164 

1165 pslice.slice = targ 

1166 

1167 try: 

1168 # Call the grammar rule with our special slice object 

1169 self.state = state 

1170 p.callable(pslice) 

1171 symstack.append(sym) 

1172 state = goto[statestack[-1]][pname] 

1173 statestack.append(state) 

1174 except SyntaxError: 

1175 # If an error was set. Enter error recovery state 

1176 lookaheadstack.append(lookahead) # Save the current lookahead token 

1177 statestack.pop() # Pop back one state (before the reduce) 

1178 state = statestack[-1] 

1179 sym.type = "error" 

1180 sym.value = "error" 

1181 lookahead = sym 

1182 errorcount = error_count 

1183 self.errorok = False 

1184 

1185 continue 

1186 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1187 

1188 if t == 0: 

1189 n = symstack[-1] 

1190 result = getattr(n, "value", None) 

1191 return result 

1192 

1193 if t is None: 

1194 # We have some kind of parsing error here. To handle 

1195 # this, we are going to push the current token onto 

1196 # the tokenstack and replace it with an 'error' token. 

1197 # If there are any synchronization rules, they may 

1198 # catch it. 

1199 # 

1200 # In addition to pushing the error token, we call call 

1201 # the user defined p_error() function if this is the 

1202 # first syntax error. This function is only called if 

1203 # errorcount == 0. 

1204 if errorcount == 0 or self.errorok: 

1205 errorcount = error_count 

1206 self.errorok = False 

1207 errtoken = lookahead 

1208 if errtoken.type == "$end": 

1209 errtoken = None # End of file! 

1210 if self.errorfunc: 

1211 if errtoken and not hasattr(errtoken, "lexer"): 

1212 errtoken.lexer = lexer 

1213 self.state = state 

1214 tok = call_errorfunc(self.errorfunc, errtoken, self) 

1215 if self.errorok: 

1216 # User must have done some kind of panic 

1217 # mode recovery on their own. The 

1218 # returned token is the next lookahead 

1219 lookahead = tok 

1220 errtoken = None 

1221 continue 

1222 else: 

1223 if errtoken: 

1224 if hasattr(errtoken, "lineno"): 

1225 lineno = lookahead.lineno 

1226 else: 

1227 lineno = 0 

1228 if lineno: 

1229 sys.stderr.write( 

1230 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type) 

1231 ) 

1232 else: 

1233 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 

1234 else: 

1235 sys.stderr.write("yacc: Parse error in input. EOF\n") 

1236 return 

1237 

1238 else: 

1239 errorcount = error_count 

1240 

1241 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

1242 # entire parse has been rolled back and we're completely hosed. The token is 

1243 # discarded and we just keep going. 

1244 

1245 if len(statestack) <= 1 and lookahead.type != "$end": 

1246 lookahead = None 

1247 errtoken = None 

1248 state = 0 

1249 # Nuke the pushback stack 

1250 del lookaheadstack[:] 

1251 continue 

1252 

1253 # case 2: the statestack has a couple of entries on it, but we're 

1254 # at the end of the file. nuke the top entry and generate an error token 

1255 

1256 # Start nuking entries on the stack 

1257 if lookahead.type == "$end": 

1258 # Whoa. We're really hosed here. Bail out 

1259 return 

1260 

1261 if lookahead.type != "error": 

1262 sym = symstack[-1] 

1263 if sym.type == "error": 

1264 # Hmmm. Error is on top of stack, we'll just nuke input 

1265 # symbol and continue 

1266 lookahead = None 

1267 continue 

1268 

1269 # Create the error symbol for the first time and make it the new lookahead symbol 

1270 t = YaccSymbol() 

1271 t.type = "error" 

1272 

1273 if hasattr(lookahead, "lineno"): 

1274 t.lineno = t.endlineno = lookahead.lineno 

1275 if hasattr(lookahead, "lexpos"): 

1276 t.lexpos = t.endlexpos = lookahead.lexpos 

1277 t.value = lookahead 

1278 lookaheadstack.append(lookahead) 

1279 lookahead = t 

1280 else: 

1281 sym = symstack.pop() 

1282 statestack.pop() 

1283 state = statestack[-1] 

1284 

1285 continue 

1286 

1287 # Call an error function here 

1288 raise RuntimeError("yacc: internal parser error!!!\n") 

1289 

1290 # --! parseopt-notrack-end 

1291 

1292 

1293# ----------------------------------------------------------------------------- 

1294# === Grammar Representation === 

1295# 

1296# The following functions, classes, and variables are used to represent and 

1297# manipulate the rules that make up a grammar. 

1298# ----------------------------------------------------------------------------- 

1299 

1300# regex matching identifiers 

1301_is_identifier = re.compile(r"^[a-zA-Z0-9_-]+$") 

1302 

1303# ----------------------------------------------------------------------------- 

1304# class Production: 

1305# 

1306# This class stores the raw information about a single production or grammar rule. 

1307# A grammar rule refers to a specification such as this: 

1308# 

1309# expr : expr PLUS term 

1310# 

1311# Here are the basic attributes defined on all productions 

1312# 

1313# name - Name of the production. For example 'expr' 

1314# prod - A list of symbols on the right side ['expr','PLUS','term'] 

1315# prec - Production precedence level 

1316# number - Production number. 

1317# func - Function that executes on reduce 

1318# file - File where production function is defined 

1319# lineno - Line number where production function is defined 

1320# 

1321# The following attributes are defined or optional. 

1322# 

1323# len - Length of the production (number of symbols on right hand side) 

1324# usyms - Set of unique symbols found in the production 

1325# ----------------------------------------------------------------------------- 

1326 

1327 

1328class Production(object): 

1329 reduced = 0 

1330 

1331 def __init__(self, number, name, prod, precedence=("right", 0), func=None, file="", line=0): 

1332 self.name = name 

1333 self.prod = tuple(prod) 

1334 self.number = number 

1335 self.func = func 

1336 self.callable = None 

1337 self.file = file 

1338 self.line = line 

1339 self.prec = precedence 

1340 

1341 # Internal settings used during table construction 

1342 

1343 self.len = len(self.prod) # Length of the production 

1344 

1345 # Create a list of unique production symbols used in the production 

1346 self.usyms = [] 

1347 for s in self.prod: 

1348 if s not in self.usyms: 

1349 self.usyms.append(s) 

1350 

1351 # List of all LR items for the production 

1352 self.lr_items = [] 

1353 self.lr_next = None 

1354 

1355 # Create a string representation 

1356 if self.prod: 

1357 self.str = "%s -> %s" % (self.name, " ".join(self.prod)) 

1358 else: 

1359 self.str = "%s -> <empty>" % self.name 

1360 

1361 def __str__(self): 

1362 return self.str 

1363 

1364 def __repr__(self): 

1365 return "Production(" + str(self) + ")" 

1366 

1367 def __len__(self): 

1368 return len(self.prod) 

1369 

1370 def __nonzero__(self): 

1371 return 1 

1372 

1373 def __getitem__(self, index): 

1374 return self.prod[index] 

1375 

1376 # Return the nth lr_item from the production (or None if at the end) 

1377 def lr_item(self, n): 

1378 if n > len(self.prod): 

1379 return None 

1380 p = LRItem(self, n) 

1381 # Precompute the list of productions immediately following. 

1382 try: 

1383 p.lr_after = self.Prodnames[p.prod[n + 1]] 

1384 except (IndexError, KeyError): 

1385 p.lr_after = [] 

1386 try: 

1387 p.lr_before = p.prod[n - 1] 

1388 except IndexError: 

1389 p.lr_before = None 

1390 return p 

1391 

1392 # Bind the production function name to a callable 

1393 def bind(self, pdict): 

1394 if self.func: 

1395 self.callable = pdict[self.func] 

1396 

1397 

1398# This class serves as a minimal standin for Production objects when 

1399# reading table data from files. It only contains information 

1400# actually used by the LR parsing engine, plus some additional 

1401# debugging information. 

1402class MiniProduction(object): 

1403 def __init__(self, str, name, len, func, file, line): 

1404 self.name = name 

1405 self.len = len 

1406 self.func = func 

1407 self.callable = None 

1408 self.file = file 

1409 self.line = line 

1410 self.str = str 

1411 

1412 def __str__(self): 

1413 return self.str 

1414 

1415 def __repr__(self): 

1416 return "MiniProduction(%s)" % self.str 

1417 

1418 # Bind the production function name to a callable 

1419 def bind(self, pdict): 

1420 if self.func: 

1421 self.callable = pdict[self.func] 

1422 

1423 

1424# ----------------------------------------------------------------------------- 

1425# class LRItem 

1426# 

1427# This class represents a specific stage of parsing a production rule. For 

1428# example: 

1429# 

1430# expr : expr . PLUS term 

1431# 

1432# In the above, the "." represents the current location of the parse. Here 

1433# basic attributes: 

1434# 

1435# name - Name of the production. For example 'expr' 

1436# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 

1437# number - Production number. 

1438# 

1439# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 

1440# then lr_next refers to 'expr -> expr PLUS . term' 

1441# lr_index - LR item index (location of the ".") in the prod list. 

1442# lookaheads - LALR lookahead symbols for this item 

1443# len - Length of the production (number of symbols on right hand side) 

1444# lr_after - List of all productions that immediately follow 

1445# lr_before - Grammar symbol immediately before 

1446# ----------------------------------------------------------------------------- 

1447 

1448 

1449class LRItem(object): 

1450 def __init__(self, p, n): 

1451 self.name = p.name 

1452 self.prod = list(p.prod) 

1453 self.number = p.number 

1454 self.lr_index = n 

1455 self.lookaheads = {} 

1456 self.prod.insert(n, ".") 

1457 self.prod = tuple(self.prod) 

1458 self.len = len(self.prod) 

1459 self.usyms = p.usyms 

1460 

1461 def __str__(self): 

1462 if self.prod: 

1463 s = "%s -> %s" % (self.name, " ".join(self.prod)) 

1464 else: 

1465 s = "%s -> <empty>" % self.name 

1466 return s 

1467 

1468 def __repr__(self): 

1469 return "LRItem(" + str(self) + ")" 

1470 

1471 

1472# ----------------------------------------------------------------------------- 

1473# rightmost_terminal() 

1474# 

1475# Return the rightmost terminal from a list of symbols. Used in add_production() 

1476# ----------------------------------------------------------------------------- 

1477def rightmost_terminal(symbols, terminals): 

1478 i = len(symbols) - 1 

1479 while i >= 0: 

1480 if symbols[i] in terminals: 

1481 return symbols[i] 

1482 i -= 1 

1483 return None 

1484 

1485 

1486# ----------------------------------------------------------------------------- 

1487# === GRAMMAR CLASS === 

1488# 

1489# The following class represents the contents of the specified grammar along 

1490# with various computed properties such as first sets, follow sets, LR items, etc. 

1491# This data is used for critical parts of the table generation process later. 

1492# ----------------------------------------------------------------------------- 

1493 

1494 

1495class GrammarError(YaccError): 

1496 pass 

1497 

1498 

1499class Grammar(object): 

1500 def __init__(self, terminals): 

1501 self.Productions = [None] # A list of all of the productions. The first 

1502 # entry is always reserved for the purpose of 

1503 # building an augmented grammar 

1504 

1505 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all 

1506 # productions of that nonterminal. 

1507 

1508 self.Prodmap = {} # A dictionary that is only used to detect duplicate 

1509 # productions. 

1510 

1511 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a 

1512 # list of the rules where they are used. 

1513 

1514 for term in terminals: 

1515 self.Terminals[term] = [] 

1516 

1517 self.Terminals["error"] = [] 

1518 

1519 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list 

1520 # of rule numbers where they are used. 

1521 

1522 self.First = {} # A dictionary of precomputed FIRST(x) symbols 

1523 

1524 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols 

1525 

1526 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the 

1527 # form ('right',level) or ('nonassoc', level) or ('left',level) 

1528 

1529 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. 

1530 # This is only used to provide error checking and to generate 

1531 # a warning about unused precedence rules. 

1532 

1533 self.Start = None # Starting symbol for the grammar 

1534 

1535 def __len__(self): 

1536 return len(self.Productions) 

1537 

1538 def __getitem__(self, index): 

1539 return self.Productions[index] 

1540 

1541 # ----------------------------------------------------------------------------- 

1542 # set_precedence() 

1543 # 

1544 # Sets the precedence for a given terminal. assoc is the associativity such as 

1545 # 'left','right', or 'nonassoc'. level is a numeric level. 

1546 # 

1547 # ----------------------------------------------------------------------------- 

1548 

1549 def set_precedence(self, term, assoc, level): 

1550 assert self.Productions == [None], "Must call set_precedence() before add_production()" 

1551 if term in self.Precedence: 

1552 raise GrammarError("Precedence already specified for terminal %r" % term) 

1553 if assoc not in ["left", "right", "nonassoc"]: 

1554 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 

1555 self.Precedence[term] = (assoc, level) 

1556 

1557 # ----------------------------------------------------------------------------- 

1558 # add_production() 

1559 # 

1560 # Given an action function, this function assembles a production rule and 

1561 # computes its precedence level. 

1562 # 

1563 # The production rule is supplied as a list of symbols. For example, 

1564 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 

1565 # symbols ['expr','PLUS','term']. 

1566 # 

1567 # Precedence is determined by the precedence of the right-most non-terminal 

1568 # or the precedence of a terminal specified by %prec. 

1569 # 

1570 # A variety of error checks are performed to make sure production symbols 

1571 # are valid and that %prec is used correctly. 

1572 # ----------------------------------------------------------------------------- 

1573 

1574 def add_production(self, prodname, syms, func=None, file="", line=0): 

1575 if prodname in self.Terminals: 

1576 raise GrammarError( 

1577 "%s:%d: Illegal rule name %r. Already defined as a token" % (file, line, prodname) 

1578 ) 

1579 if prodname == "error": 

1580 raise GrammarError( 

1581 "%s:%d: Illegal rule name %r. error is a reserved word" % (file, line, prodname) 

1582 ) 

1583 if not _is_identifier.match(prodname): 

1584 raise GrammarError("%s:%d: Illegal rule name %r" % (file, line, prodname)) 

1585 

1586 # Look for literal tokens 

1587 for n, s in enumerate(syms): 

1588 if s[0] in "'\"": 

1589 try: 

1590 c = eval(s) 

1591 if len(c) > 1: 

1592 raise GrammarError( 

1593 "%s:%d: Literal token %s in rule %r may only be a single character" 

1594 % (file, line, s, prodname) 

1595 ) 

1596 if c not in self.Terminals: 

1597 self.Terminals[c] = [] 

1598 syms[n] = c 

1599 continue 

1600 except SyntaxError: 

1601 pass 

1602 if not _is_identifier.match(s) and s != "%prec": 

1603 raise GrammarError("%s:%d: Illegal name %r in rule %r" % (file, line, s, prodname)) 

1604 

1605 # Determine the precedence level 

1606 if "%prec" in syms: 

1607 if syms[-1] == "%prec": 

1608 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file, line)) 

1609 if syms[-2] != "%prec": 

1610 raise GrammarError( 

1611 "%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file, line) 

1612 ) 

1613 precname = syms[-1] 

1614 prodprec = self.Precedence.get(precname) 

1615 if not prodprec: 

1616 raise GrammarError("%s:%d: Nothing known about the precedence of %r" % (file, line, precname)) 

1617 else: 

1618 self.UsedPrecedence.add(precname) 

1619 del syms[-2:] # Drop %prec from the rule 

1620 else: 

1621 # If no %prec, precedence is determined by the rightmost terminal symbol 

1622 precname = rightmost_terminal(syms, self.Terminals) 

1623 prodprec = self.Precedence.get(precname, ("right", 0)) 

1624 

1625 # See if the rule is already in the rulemap 

1626 map = "%s -> %s" % (prodname, syms) 

1627 if map in self.Prodmap: 

1628 m = self.Prodmap[map] 

1629 raise GrammarError( 

1630 "%s:%d: Duplicate rule %s. " % (file, line, m) 

1631 + "Previous definition at %s:%d" % (m.file, m.line) 

1632 ) 

1633 

1634 # From this point on, everything is valid. Create a new Production instance 

1635 pnumber = len(self.Productions) 

1636 if prodname not in self.Nonterminals: 

1637 self.Nonterminals[prodname] = [] 

1638 

1639 # Add the production number to Terminals and Nonterminals 

1640 for t in syms: 

1641 if t in self.Terminals: 

1642 self.Terminals[t].append(pnumber) 

1643 else: 

1644 if t not in self.Nonterminals: 

1645 self.Nonterminals[t] = [] 

1646 self.Nonterminals[t].append(pnumber) 

1647 

1648 # Create a production and add it to the list of productions 

1649 p = Production(pnumber, prodname, syms, prodprec, func, file, line) 

1650 self.Productions.append(p) 

1651 self.Prodmap[map] = p 

1652 

1653 # Add to the global productions list 

1654 try: 

1655 self.Prodnames[prodname].append(p) 

1656 except KeyError: 

1657 self.Prodnames[prodname] = [p] 

1658 

1659 # ----------------------------------------------------------------------------- 

1660 # set_start() 

1661 # 

1662 # Sets the starting symbol and creates the augmented grammar. Production 

1663 # rule 0 is S' -> start where start is the start symbol. 

1664 # ----------------------------------------------------------------------------- 

1665 

1666 def set_start(self, start=None): 

1667 if not start: 

1668 start = self.Productions[1].name 

1669 if start not in self.Nonterminals: 

1670 raise GrammarError("start symbol %s undefined" % start) 

1671 self.Productions[0] = Production(0, "S'", [start]) 

1672 self.Nonterminals[start].append(0) 

1673 self.Start = start 

1674 

1675 # ----------------------------------------------------------------------------- 

1676 # find_unreachable() 

1677 # 

1678 # Find all of the nonterminal symbols that can't be reached from the starting 

1679 # symbol. Returns a list of nonterminals that can't be reached. 

1680 # ----------------------------------------------------------------------------- 

1681 

1682 def find_unreachable(self): 

1683 # Mark all symbols that are reachable from a symbol s 

1684 def mark_reachable_from(s): 

1685 if s in reachable: 

1686 return 

1687 reachable.add(s) 

1688 for p in self.Prodnames.get(s, []): 

1689 for r in p.prod: 

1690 mark_reachable_from(r) 

1691 

1692 reachable = set() 

1693 mark_reachable_from(self.Productions[0].prod[0]) 

1694 return [s for s in self.Nonterminals if s not in reachable] 

1695 

1696 # ----------------------------------------------------------------------------- 

1697 # infinite_cycles() 

1698 # 

1699 # This function looks at the various parsing rules and tries to detect 

1700 # infinite recursion cycles (grammar rules where there is no possible way 

1701 # to derive a string of only terminals). 

1702 # ----------------------------------------------------------------------------- 

1703 

1704 def infinite_cycles(self): 

1705 terminates = {} 

1706 

1707 # Terminals: 

1708 for t in self.Terminals: 

1709 terminates[t] = True 

1710 

1711 terminates["$end"] = True 

1712 

1713 # Nonterminals: 

1714 

1715 # Initialize to false: 

1716 for n in self.Nonterminals: 

1717 terminates[n] = False 

1718 

1719 # Then propagate termination until no change: 

1720 while True: 

1721 some_change = False 

1722 for n, pl in self.Prodnames.items(): 

1723 # Nonterminal n terminates iff any of its productions terminates. 

1724 for p in pl: 

1725 # Production p terminates iff all of its rhs symbols terminate. 

1726 for s in p.prod: 

1727 if not terminates[s]: 

1728 # The symbol s does not terminate, 

1729 # so production p does not terminate. 

1730 p_terminates = False 

1731 break 

1732 else: 

1733 # didn't break from the loop, 

1734 # so every symbol s terminates 

1735 # so production p terminates. 

1736 p_terminates = True 

1737 

1738 if p_terminates: 

1739 # symbol n terminates! 

1740 if not terminates[n]: 

1741 terminates[n] = True 

1742 some_change = True 

1743 # Don't need to consider any more productions for this n. 

1744 break 

1745 

1746 if not some_change: 

1747 break 

1748 

1749 infinite = [] 

1750 for s, term in terminates.items(): 

1751 if not term: 

1752 if s not in self.Prodnames and s not in self.Terminals and s != "error": 

1753 # s is used-but-not-defined, and we've already warned of that, 

1754 # so it would be overkill to say that it's also non-terminating. 

1755 pass 

1756 else: 

1757 infinite.append(s) 

1758 

1759 return infinite 

1760 

1761 # ----------------------------------------------------------------------------- 

1762 # undefined_symbols() 

1763 # 

1764 # Find all symbols that were used the grammar, but not defined as tokens or 

1765 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 

1766 # and prod is the production where the symbol was used. 

1767 # ----------------------------------------------------------------------------- 

1768 def undefined_symbols(self): 

1769 result = [] 

1770 for p in self.Productions: 

1771 if not p: 

1772 continue 

1773 

1774 for s in p.prod: 

1775 if s not in self.Prodnames and s not in self.Terminals and s != "error": 

1776 result.append((s, p)) 

1777 return result 

1778 

1779 # ----------------------------------------------------------------------------- 

1780 # unused_terminals() 

1781 # 

1782 # Find all terminals that were defined, but not used by the grammar. Returns 

1783 # a list of all symbols. 

1784 # ----------------------------------------------------------------------------- 

1785 def unused_terminals(self): 

1786 unused_tok = [] 

1787 for s, v in self.Terminals.items(): 

1788 if s != "error" and not v: 

1789 unused_tok.append(s) 

1790 

1791 return unused_tok 

1792 

1793 # ------------------------------------------------------------------------------ 

1794 # unused_rules() 

1795 # 

1796 # Find all grammar rules that were defined, but not used (maybe not reachable) 

1797 # Returns a list of productions. 

1798 # ------------------------------------------------------------------------------ 

1799 

1800 def unused_rules(self): 

1801 unused_prod = [] 

1802 for s, v in self.Nonterminals.items(): 

1803 if not v: 

1804 p = self.Prodnames[s][0] 

1805 unused_prod.append(p) 

1806 return unused_prod 

1807 

1808 # ----------------------------------------------------------------------------- 

1809 # unused_precedence() 

1810 # 

1811 # Returns a list of tuples (term,precedence) corresponding to precedence 

1812 # rules that were never used by the grammar. term is the name of the terminal 

1813 # on which precedence was applied and precedence is a string such as 'left' or 

1814 # 'right' corresponding to the type of precedence. 

1815 # ----------------------------------------------------------------------------- 

1816 

1817 def unused_precedence(self): 

1818 unused = [] 

1819 for termname in self.Precedence: 

1820 if not (termname in self.Terminals or termname in self.UsedPrecedence): 

1821 unused.append((termname, self.Precedence[termname][0])) 

1822 

1823 return unused 

1824 

1825 # ------------------------------------------------------------------------- 

1826 # _first() 

1827 # 

1828 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 

1829 # 

1830 # During execution of compute_first1, the result may be incomplete. 

1831 # Afterward (e.g., when called from compute_follow()), it will be complete. 

1832 # ------------------------------------------------------------------------- 

1833 def _first(self, beta): 

1834 # We are computing First(x1,x2,x3,...,xn) 

1835 result = [] 

1836 for x in beta: 

1837 x_produces_empty = False 

1838 

1839 # Add all the non-<empty> symbols of First[x] to the result. 

1840 for f in self.First[x]: 

1841 if f == "<empty>": 

1842 x_produces_empty = True 

1843 else: 

1844 if f not in result: 

1845 result.append(f) 

1846 

1847 if x_produces_empty: 

1848 # We have to consider the next x in beta, 

1849 # i.e. stay in the loop. 

1850 pass 

1851 else: 

1852 # We don't have to consider any further symbols in beta. 

1853 break 

1854 else: 

1855 # There was no 'break' from the loop, 

1856 # so x_produces_empty was true for all x in beta, 

1857 # so beta produces empty as well. 

1858 result.append("<empty>") 

1859 

1860 return result 

1861 

1862 # ------------------------------------------------------------------------- 

1863 # compute_first() 

1864 # 

1865 # Compute the value of FIRST1(X) for all symbols 

1866 # ------------------------------------------------------------------------- 

1867 def compute_first(self): 

1868 if self.First: 

1869 return self.First 

1870 

1871 # Terminals: 

1872 for t in self.Terminals: 

1873 self.First[t] = [t] 

1874 

1875 self.First["$end"] = ["$end"] 

1876 

1877 # Nonterminals: 

1878 

1879 # Initialize to the empty set: 

1880 for n in self.Nonterminals: 

1881 self.First[n] = [] 

1882 

1883 # Then propagate symbols until no change: 

1884 while True: 

1885 some_change = False 

1886 for n in self.Nonterminals: 

1887 for p in self.Prodnames[n]: 

1888 for f in self._first(p.prod): 

1889 if f not in self.First[n]: 

1890 self.First[n].append(f) 

1891 some_change = True 

1892 if not some_change: 

1893 break 

1894 

1895 return self.First 

1896 

1897 # --------------------------------------------------------------------- 

1898 # compute_follow() 

1899 # 

1900 # Computes all of the follow sets for every non-terminal symbol. The 

1901 # follow set is the set of all symbols that might follow a given 

1902 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 

1903 # --------------------------------------------------------------------- 

1904 def compute_follow(self, start=None): 

1905 # If already computed, return the result 

1906 if self.Follow: 

1907 return self.Follow 

1908 

1909 # If first sets not computed yet, do that first. 

1910 if not self.First: 

1911 self.compute_first() 

1912 

1913 # Add '$end' to the follow list of the start symbol 

1914 for k in self.Nonterminals: 

1915 self.Follow[k] = [] 

1916 

1917 if not start: 

1918 start = self.Productions[1].name 

1919 

1920 self.Follow[start] = ["$end"] 

1921 

1922 while True: 

1923 didadd = False 

1924 for p in self.Productions[1:]: 

1925 # Here is the production set 

1926 for i, B in enumerate(p.prod): 

1927 if B in self.Nonterminals: 

1928 # Okay. We got a non-terminal in a production 

1929 fst = self._first(p.prod[i + 1 :]) 

1930 hasempty = False 

1931 for f in fst: 

1932 if f != "<empty>" and f not in self.Follow[B]: 

1933 self.Follow[B].append(f) 

1934 didadd = True 

1935 if f == "<empty>": 

1936 hasempty = True 

1937 if hasempty or i == (len(p.prod) - 1): 

1938 # Add elements of follow(a) to follow(b) 

1939 for f in self.Follow[p.name]: 

1940 if f not in self.Follow[B]: 

1941 self.Follow[B].append(f) 

1942 didadd = True 

1943 if not didadd: 

1944 break 

1945 return self.Follow 

1946 

1947 # ----------------------------------------------------------------------------- 

1948 # build_lritems() 

1949 # 

1950 # This function walks the list of productions and builds a complete set of the 

1951 # LR items. The LR items are stored in two ways: First, they are uniquely 

1952 # numbered and placed in the list _lritems. Second, a linked list of LR items 

1953 # is built for each production. For example: 

1954 # 

1955 # E -> E PLUS E 

1956 # 

1957 # Creates the list 

1958 # 

1959 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 

1960 # ----------------------------------------------------------------------------- 

1961 

1962 def build_lritems(self): 

1963 for p in self.Productions: 

1964 lastlri = p 

1965 i = 0 

1966 lr_items = [] 

1967 while True: 

1968 if i > len(p): 

1969 lri = None 

1970 else: 

1971 lri = LRItem(p, i) 

1972 # Precompute the list of productions immediately following 

1973 try: 

1974 lri.lr_after = self.Prodnames[lri.prod[i + 1]] 

1975 except (IndexError, KeyError): 

1976 lri.lr_after = [] 

1977 try: 

1978 lri.lr_before = lri.prod[i - 1] 

1979 except IndexError: 

1980 lri.lr_before = None 

1981 

1982 lastlri.lr_next = lri 

1983 if not lri: 

1984 break 

1985 lr_items.append(lri) 

1986 lastlri = lri 

1987 i += 1 

1988 p.lr_items = lr_items 

1989 

1990 

1991# ----------------------------------------------------------------------------- 

1992# == Class LRTable == 

1993# 

1994# This basic class represents a basic table of LR parsing information. 

1995# Methods for generating the tables are not defined here. They are defined 

1996# in the derived class LRGeneratedTable. 

1997# ----------------------------------------------------------------------------- 

1998 

1999 

2000class VersionError(YaccError): 

2001 pass 

2002 

2003 

2004class LRTable(object): 

2005 def __init__(self): 

2006 self.lr_action = None 

2007 self.lr_goto = None 

2008 self.lr_productions = None 

2009 self.lr_method = None 

2010 

2011 def read_table(self, module): 

2012 if isinstance(module, types.ModuleType): 

2013 parsetab = module 

2014 else: 

2015 exec("import %s" % module) 

2016 parsetab = sys.modules[module] 

2017 

2018 if parsetab._tabversion != __tabversion__: 

2019 raise VersionError("yacc table file version is out of date") 

2020 

2021 self.lr_action = parsetab._lr_action 

2022 self.lr_goto = parsetab._lr_goto 

2023 

2024 self.lr_productions = [] 

2025 for p in parsetab._lr_productions: 

2026 self.lr_productions.append(MiniProduction(*p)) 

2027 

2028 self.lr_method = parsetab._lr_method 

2029 return parsetab._lr_signature 

2030 

2031 def read_pickle(self, filename): 

2032 try: 

2033 import cPickle as pickle 

2034 except ImportError: 

2035 import pickle 

2036 

2037 if not os.path.exists(filename): 

2038 raise ImportError 

2039 

2040 in_f = open(filename, "rb") 

2041 

2042 tabversion = pickle.load(in_f) 

2043 if tabversion != __tabversion__: 

2044 raise VersionError("yacc table file version is out of date") 

2045 self.lr_method = pickle.load(in_f) 

2046 signature = pickle.load(in_f) 

2047 self.lr_action = pickle.load(in_f) 

2048 self.lr_goto = pickle.load(in_f) 

2049 productions = pickle.load(in_f) 

2050 

2051 self.lr_productions = [] 

2052 for p in productions: 

2053 self.lr_productions.append(MiniProduction(*p)) 

2054 

2055 in_f.close() 

2056 return signature 

2057 

2058 # Bind all production function names to callable objects in pdict 

2059 def bind_callables(self, pdict): 

2060 for p in self.lr_productions: 

2061 p.bind(pdict) 

2062 

2063 

2064# ----------------------------------------------------------------------------- 

2065# === LR Generator === 

2066# 

2067# The following classes and functions are used to generate LR parsing tables on 

2068# a grammar. 

2069# ----------------------------------------------------------------------------- 

2070 

2071# ----------------------------------------------------------------------------- 

2072# digraph() 

2073# traverse() 

2074# 

2075# The following two functions are used to compute set valued functions 

2076# of the form: 

2077# 

2078# F(x) = F'(x) U U{F(y) | x R y} 

2079# 

2080# This is used to compute the values of Read() sets as well as FOLLOW sets 

2081# in LALR(1) generation. 

2082# 

2083# Inputs: X - An input set 

2084# R - A relation 

2085# FP - Set-valued function 

2086# ------------------------------------------------------------------------------ 

2087 

2088 

2089def digraph(X, R, FP): 

2090 N = {} 

2091 for x in X: 

2092 N[x] = 0 

2093 stack = [] 

2094 F = {} 

2095 for x in X: 

2096 if N[x] == 0: 

2097 traverse(x, N, stack, F, X, R, FP) 

2098 return F 

2099 

2100 

2101def traverse(x, N, stack, F, X, R, FP): 

2102 stack.append(x) 

2103 d = len(stack) 

2104 N[x] = d 

2105 F[x] = FP(x) # F(X) <- F'(x) 

2106 

2107 rel = R(x) # Get y's related to x 

2108 for y in rel: 

2109 if N[y] == 0: 

2110 traverse(y, N, stack, F, X, R, FP) 

2111 N[x] = min(N[x], N[y]) 

2112 for a in F.get(y, []): 

2113 if a not in F[x]: 

2114 F[x].append(a) 

2115 if N[x] == d: 

2116 N[stack[-1]] = MAXINT 

2117 F[stack[-1]] = F[x] 

2118 element = stack.pop() 

2119 while element != x: 

2120 N[stack[-1]] = MAXINT 

2121 F[stack[-1]] = F[x] 

2122 element = stack.pop() 

2123 

2124 

2125class LALRError(YaccError): 

2126 pass 

2127 

2128 

2129# ----------------------------------------------------------------------------- 

2130# == LRGeneratedTable == 

2131# 

2132# This class implements the LR table generation algorithm. There are no 

2133# public methods except for write() 

2134# ----------------------------------------------------------------------------- 

2135 

2136 

2137class LRGeneratedTable(LRTable): 

2138 def __init__(self, grammar, method="LALR", log=None): 

2139 if method not in ["SLR", "LALR"]: 

2140 raise LALRError("Unsupported method %s" % method) 

2141 

2142 self.grammar = grammar 

2143 self.lr_method = method 

2144 

2145 # Set up the logger 

2146 if not log: 

2147 log = NullLogger() 

2148 self.log = log 

2149 

2150 # Internal attributes 

2151 self.lr_action = {} # Action table 

2152 self.lr_goto = {} # Goto table 

2153 self.lr_productions = grammar.Productions # Copy of grammar Production array 

2154 self.lr_goto_cache = {} # Cache of computed gotos 

2155 self.lr0_cidhash = {} # Cache of closures 

2156 

2157 self._add_count = 0 # Internal counter used to detect cycles 

2158 

2159 # Diagonistic information filled in by the table generator 

2160 self.sr_conflict = 0 

2161 self.rr_conflict = 0 

2162 self.conflicts = [] # List of conflicts 

2163 

2164 self.sr_conflicts = [] 

2165 self.rr_conflicts = [] 

2166 

2167 # Build the tables 

2168 self.grammar.build_lritems() 

2169 self.grammar.compute_first() 

2170 self.grammar.compute_follow() 

2171 self.lr_parse_table() 

2172 

2173 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 

2174 

2175 def lr0_closure(self, I): 

2176 self._add_count += 1 

2177 

2178 # Add everything in I to J 

2179 J = I[:] 

2180 didadd = True 

2181 while didadd: 

2182 didadd = False 

2183 for j in J: 

2184 for x in j.lr_after: 

2185 if getattr(x, "lr0_added", 0) == self._add_count: 

2186 continue 

2187 # Add B --> .G to J 

2188 J.append(x.lr_next) 

2189 x.lr0_added = self._add_count 

2190 didadd = True 

2191 

2192 return J 

2193 

2194 # Compute the LR(0) goto function goto(I,X) where I is a set 

2195 # of LR(0) items and X is a grammar symbol. This function is written 

2196 # in a way that guarantees uniqueness of the generated goto sets 

2197 # (i.e. the same goto set will never be returned as two different Python 

2198 # objects). With uniqueness, we can later do fast set comparisons using 

2199 # id(obj) instead of element-wise comparison. 

2200 

2201 def lr0_goto(self, I, x): 

2202 # First we look for a previously cached entry 

2203 g = self.lr_goto_cache.get((id(I), x)) 

2204 if g: 

2205 return g 

2206 

2207 # Now we generate the goto set in a way that guarantees uniqueness 

2208 # of the result 

2209 

2210 s = self.lr_goto_cache.get(x) 

2211 if not s: 

2212 s = {} 

2213 self.lr_goto_cache[x] = s 

2214 

2215 gs = [] 

2216 for p in I: 

2217 n = p.lr_next 

2218 if n and n.lr_before == x: 

2219 s1 = s.get(id(n)) 

2220 if not s1: 

2221 s1 = {} 

2222 s[id(n)] = s1 

2223 gs.append(n) 

2224 s = s1 

2225 g = s.get("$end") 

2226 if not g: 

2227 if gs: 

2228 g = self.lr0_closure(gs) 

2229 s["$end"] = g 

2230 else: 

2231 s["$end"] = gs 

2232 self.lr_goto_cache[(id(I), x)] = g 

2233 return g 

2234 

2235 # Compute the LR(0) sets of item function 

2236 def lr0_items(self): 

2237 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] 

2238 i = 0 

2239 for I in C: 

2240 self.lr0_cidhash[id(I)] = i 

2241 i += 1 

2242 

2243 # Loop over the items in C and each grammar symbols 

2244 i = 0 

2245 while i < len(C): 

2246 I = C[i] 

2247 i += 1 

2248 

2249 # Collect all of the symbols that could possibly be in the goto(I,X) sets 

2250 asyms = {} 

2251 for ii in I: 

2252 for s in ii.usyms: 

2253 asyms[s] = None 

2254 

2255 for x in asyms: 

2256 g = self.lr0_goto(I, x) 

2257 if not g or id(g) in self.lr0_cidhash: 

2258 continue 

2259 self.lr0_cidhash[id(g)] = len(C) 

2260 C.append(g) 

2261 

2262 return C 

2263 

2264 # ----------------------------------------------------------------------------- 

2265 # ==== LALR(1) Parsing ==== 

2266 # 

2267 # LALR(1) parsing is almost exactly the same as SLR except that instead of 

2268 # relying upon Follow() sets when performing reductions, a more selective 

2269 # lookahead set that incorporates the state of the LR(0) machine is utilized. 

2270 # Thus, we mainly just have to focus on calculating the lookahead sets. 

2271 # 

2272 # The method used here is due to DeRemer and Pennelo (1982). 

2273 # 

2274 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 

2275 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 

2276 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 

2277 # 

2278 # Further details can also be found in: 

2279 # 

2280 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 

2281 # McGraw-Hill Book Company, (1985). 

2282 # 

2283 # ----------------------------------------------------------------------------- 

2284 

2285 # ----------------------------------------------------------------------------- 

2286 # compute_nullable_nonterminals() 

2287 # 

2288 # Creates a dictionary containing all of the non-terminals that might produce 

2289 # an empty production. 

2290 # ----------------------------------------------------------------------------- 

2291 

2292 def compute_nullable_nonterminals(self): 

2293 nullable = set() 

2294 num_nullable = 0 

2295 while True: 

2296 for p in self.grammar.Productions[1:]: 

2297 if p.len == 0: 

2298 nullable.add(p.name) 

2299 continue 

2300 for t in p.prod: 

2301 if t not in nullable: 

2302 break 

2303 else: 

2304 nullable.add(p.name) 

2305 if len(nullable) == num_nullable: 

2306 break 

2307 num_nullable = len(nullable) 

2308 return nullable 

2309 

2310 # ----------------------------------------------------------------------------- 

2311 # find_nonterminal_trans(C) 

2312 # 

2313 # Given a set of LR(0) items, this functions finds all of the non-terminal 

2314 # transitions. These are transitions in which a dot appears immediately before 

2315 # a non-terminal. Returns a list of tuples of the form (state,N) where state 

2316 # is the state number and N is the nonterminal symbol. 

2317 # 

2318 # The input C is the set of LR(0) items. 

2319 # ----------------------------------------------------------------------------- 

2320 

2321 def find_nonterminal_transitions(self, C): 

2322 trans = [] 

2323 for stateno, state in enumerate(C): 

2324 for p in state: 

2325 if p.lr_index < p.len - 1: 

2326 t = (stateno, p.prod[p.lr_index + 1]) 

2327 if t[1] in self.grammar.Nonterminals: 

2328 if t not in trans: 

2329 trans.append(t) 

2330 return trans 

2331 

2332 # ----------------------------------------------------------------------------- 

2333 # dr_relation() 

2334 # 

2335 # Computes the DR(p,A) relationships for non-terminal transitions. The input 

2336 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 

2337 # 

2338 # Returns a list of terminals. 

2339 # ----------------------------------------------------------------------------- 

2340 

2341 def dr_relation(self, C, trans, nullable): 

2342 state, N = trans 

2343 terms = [] 

2344 

2345 g = self.lr0_goto(C[state], N) 

2346 for p in g: 

2347 if p.lr_index < p.len - 1: 

2348 a = p.prod[p.lr_index + 1] 

2349 if a in self.grammar.Terminals: 

2350 if a not in terms: 

2351 terms.append(a) 

2352 

2353 # This extra bit is to handle the start state 

2354 if state == 0 and N == self.grammar.Productions[0].prod[0]: 

2355 terms.append("$end") 

2356 

2357 return terms 

2358 

2359 # ----------------------------------------------------------------------------- 

2360 # reads_relation() 

2361 # 

2362 # Computes the READS() relation (p,A) READS (t,C). 

2363 # ----------------------------------------------------------------------------- 

2364 

2365 def reads_relation(self, C, trans, empty): 

2366 # Look for empty transitions 

2367 rel = [] 

2368 state, N = trans 

2369 

2370 g = self.lr0_goto(C[state], N) 

2371 j = self.lr0_cidhash.get(id(g), -1) 

2372 for p in g: 

2373 if p.lr_index < p.len - 1: 

2374 a = p.prod[p.lr_index + 1] 

2375 if a in empty: 

2376 rel.append((j, a)) 

2377 

2378 return rel 

2379 

2380 # ----------------------------------------------------------------------------- 

2381 # compute_lookback_includes() 

2382 # 

2383 # Determines the lookback and includes relations 

2384 # 

2385 # LOOKBACK: 

2386 # 

2387 # This relation is determined by running the LR(0) state machine forward. 

2388 # For example, starting with a production "N : . A B C", we run it forward 

2389 # to obtain "N : A B C ." We then build a relationship between this final 

2390 # state and the starting state. These relationships are stored in a dictionary 

2391 # lookdict. 

2392 # 

2393 # INCLUDES: 

2394 # 

2395 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 

2396 # 

2397 # This relation is used to determine non-terminal transitions that occur 

2398 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 

2399 # if the following holds: 

2400 # 

2401 # B -> LAT, where T -> epsilon and p' -L-> p 

2402 # 

2403 # L is essentially a prefix (which may be empty), T is a suffix that must be 

2404 # able to derive an empty string. State p' must lead to state p with the string L. 

2405 # 

2406 # ----------------------------------------------------------------------------- 

2407 

2408 def compute_lookback_includes(self, C, trans, nullable): 

2409 lookdict = {} # Dictionary of lookback relations 

2410 includedict = {} # Dictionary of include relations 

2411 

2412 # Make a dictionary of non-terminal transitions 

2413 dtrans = {} 

2414 for t in trans: 

2415 dtrans[t] = 1 

2416 

2417 # Loop over all transitions and compute lookbacks and includes 

2418 for state, N in trans: 

2419 lookb = [] 

2420 includes = [] 

2421 for p in C[state]: 

2422 if p.name != N: 

2423 continue 

2424 

2425 # Okay, we have a name match. We now follow the production all the way 

2426 # through the state machine until we get the . on the right hand side 

2427 

2428 lr_index = p.lr_index 

2429 j = state 

2430 while lr_index < p.len - 1: 

2431 lr_index = lr_index + 1 

2432 t = p.prod[lr_index] 

2433 

2434 # Check to see if this symbol and state are a non-terminal transition 

2435 if (j, t) in dtrans: 

2436 # Yes. Okay, there is some chance that this is an includes relation 

2437 # the only way to know for certain is whether the rest of the 

2438 # production derives empty 

2439 

2440 li = lr_index + 1 

2441 while li < p.len: 

2442 if p.prod[li] in self.grammar.Terminals: 

2443 break # No forget it 

2444 if p.prod[li] not in nullable: 

2445 break 

2446 li = li + 1 

2447 else: 

2448 # Appears to be a relation between (j,t) and (state,N) 

2449 includes.append((j, t)) 

2450 

2451 g = self.lr0_goto(C[j], t) # Go to next set 

2452 j = self.lr0_cidhash.get(id(g), -1) # Go to next state 

2453 

2454 # When we get here, j is the final state, now we have to locate the production 

2455 for r in C[j]: 

2456 if r.name != p.name: 

2457 continue 

2458 if r.len != p.len: 

2459 continue 

2460 i = 0 

2461 # This look is comparing a production ". A B C" with "A B C ." 

2462 while i < r.lr_index: 

2463 if r.prod[i] != p.prod[i + 1]: 

2464 break 

2465 i = i + 1 

2466 else: 

2467 lookb.append((j, r)) 

2468 for i in includes: 

2469 if i not in includedict: 

2470 includedict[i] = [] 

2471 includedict[i].append((state, N)) 

2472 lookdict[(state, N)] = lookb 

2473 

2474 return lookdict, includedict 

2475 

2476 # ----------------------------------------------------------------------------- 

2477 # compute_read_sets() 

2478 # 

2479 # Given a set of LR(0) items, this function computes the read sets. 

2480 # 

2481 # Inputs: C = Set of LR(0) items 

2482 # ntrans = Set of nonterminal transitions 

2483 # nullable = Set of empty transitions 

2484 # 

2485 # Returns a set containing the read sets 

2486 # ----------------------------------------------------------------------------- 

2487 

2488 def compute_read_sets(self, C, ntrans, nullable): 

2489 FP = lambda x: self.dr_relation(C, x, nullable) 

2490 R = lambda x: self.reads_relation(C, x, nullable) 

2491 F = digraph(ntrans, R, FP) 

2492 return F 

2493 

2494 # ----------------------------------------------------------------------------- 

2495 # compute_follow_sets() 

2496 # 

2497 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 

2498 # and an include set, this function computes the follow sets 

2499 # 

2500 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 

2501 # 

2502 # Inputs: 

2503 # ntrans = Set of nonterminal transitions 

2504 # readsets = Readset (previously computed) 

2505 # inclsets = Include sets (previously computed) 

2506 # 

2507 # Returns a set containing the follow sets 

2508 # ----------------------------------------------------------------------------- 

2509 

2510 def compute_follow_sets(self, ntrans, readsets, inclsets): 

2511 FP = lambda x: readsets[x] 

2512 R = lambda x: inclsets.get(x, []) 

2513 F = digraph(ntrans, R, FP) 

2514 return F 

2515 

2516 # ----------------------------------------------------------------------------- 

2517 # add_lookaheads() 

2518 # 

2519 # Attaches the lookahead symbols to grammar rules. 

2520 # 

2521 # Inputs: lookbacks - Set of lookback relations 

2522 # followset - Computed follow set 

2523 # 

2524 # This function directly attaches the lookaheads to productions contained 

2525 # in the lookbacks set 

2526 # ----------------------------------------------------------------------------- 

2527 

2528 def add_lookaheads(self, lookbacks, followset): 

2529 for trans, lb in lookbacks.items(): 

2530 # Loop over productions in lookback 

2531 for state, p in lb: 

2532 if state not in p.lookaheads: 

2533 p.lookaheads[state] = [] 

2534 f = followset.get(trans, []) 

2535 for a in f: 

2536 if a not in p.lookaheads[state]: 

2537 p.lookaheads[state].append(a) 

2538 

2539 # ----------------------------------------------------------------------------- 

2540 # add_lalr_lookaheads() 

2541 # 

2542 # This function does all of the work of adding lookahead information for use 

2543 # with LALR parsing 

2544 # ----------------------------------------------------------------------------- 

2545 

2546 def add_lalr_lookaheads(self, C): 

2547 # Determine all of the nullable nonterminals 

2548 nullable = self.compute_nullable_nonterminals() 

2549 

2550 # Find all non-terminal transitions 

2551 trans = self.find_nonterminal_transitions(C) 

2552 

2553 # Compute read sets 

2554 readsets = self.compute_read_sets(C, trans, nullable) 

2555 

2556 # Compute lookback/includes relations 

2557 lookd, included = self.compute_lookback_includes(C, trans, nullable) 

2558 

2559 # Compute LALR FOLLOW sets 

2560 followsets = self.compute_follow_sets(trans, readsets, included) 

2561 

2562 # Add all of the lookaheads 

2563 self.add_lookaheads(lookd, followsets) 

2564 

2565 # ----------------------------------------------------------------------------- 

2566 # lr_parse_table() 

2567 # 

2568 # This function constructs the parse tables for SLR or LALR 

2569 # ----------------------------------------------------------------------------- 

2570 def lr_parse_table(self): 

2571 Productions = self.grammar.Productions 

2572 Precedence = self.grammar.Precedence 

2573 goto = self.lr_goto # Goto array 

2574 action = self.lr_action # Action array 

2575 log = self.log # Logger for output 

2576 

2577 actionp = {} # Action production array (temporary) 

2578 

2579 log.info("Parsing method: %s", self.lr_method) 

2580 

2581 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 

2582 # This determines the number of states 

2583 

2584 C = self.lr0_items() 

2585 

2586 if self.lr_method == "LALR": 

2587 self.add_lalr_lookaheads(C) 

2588 

2589 # Build the parser table, state by state 

2590 st = 0 

2591 for I in C: 

2592 # Loop over each production in I 

2593 actlist = [] # List of actions 

2594 st_action = {} 

2595 st_actionp = {} 

2596 st_goto = {} 

2597 log.info("") 

2598 log.info("state %d", st) 

2599 log.info("") 

2600 for p in I: 

2601 log.info(" (%d) %s", p.number, p) 

2602 log.info("") 

2603 

2604 for p in I: 

2605 if p.len == p.lr_index + 1: 

2606 if p.name == "S'": 

2607 # Start symbol. Accept! 

2608 st_action["$end"] = 0 

2609 st_actionp["$end"] = p 

2610 else: 

2611 # We are at the end of a production. Reduce! 

2612 if self.lr_method == "LALR": 

2613 laheads = p.lookaheads[st] 

2614 else: 

2615 laheads = self.grammar.Follow[p.name] 

2616 for a in laheads: 

2617 actlist.append((a, p, "reduce using rule %d (%s)" % (p.number, p))) 

2618 r = st_action.get(a) 

2619 if r is not None: 

2620 # Whoa. Have a shift/reduce or reduce/reduce conflict 

2621 if r > 0: 

2622 # Need to decide on shift or reduce here 

2623 # By default we favor shifting. Need to add 

2624 # some precedence rules here. 

2625 

2626 # Shift precedence comes from the token 

2627 sprec, slevel = Precedence.get(a, ("right", 0)) 

2628 

2629 # Reduce precedence comes from rule being reduced (p) 

2630 rprec, rlevel = Productions[p.number].prec 

2631 

2632 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == "left")): 

2633 # We really need to reduce here. 

2634 st_action[a] = -p.number 

2635 st_actionp[a] = p 

2636 if not slevel and not rlevel: 

2637 log.info(" ! shift/reduce conflict for %s resolved as reduce", a) 

2638 self.sr_conflicts.append((st, a, "reduce")) 

2639 Productions[p.number].reduced += 1 

2640 elif (slevel == rlevel) and (rprec == "nonassoc"): 

2641 st_action[a] = None 

2642 else: 

2643 # Hmmm. Guess we'll keep the shift 

2644 if not rlevel: 

2645 log.info(" ! shift/reduce conflict for %s resolved as shift", a) 

2646 self.sr_conflicts.append((st, a, "shift")) 

2647 elif r < 0: 

2648 # Reduce/reduce conflict. In this case, we favor the rule 

2649 # that was defined first in the grammar file 

2650 oldp = Productions[-r] 

2651 pp = Productions[p.number] 

2652 if oldp.line > pp.line: 

2653 st_action[a] = -p.number 

2654 st_actionp[a] = p 

2655 chosenp, rejectp = pp, oldp 

2656 Productions[p.number].reduced += 1 

2657 Productions[oldp.number].reduced -= 1 

2658 else: 

2659 chosenp, rejectp = oldp, pp 

2660 self.rr_conflicts.append((st, chosenp, rejectp)) 

2661 log.info( 

2662 " ! reduce/reduce conflict for %s resolved using rule %d (%s)", 

2663 a, 

2664 st_actionp[a].number, 

2665 st_actionp[a], 

2666 ) 

2667 else: 

2668 raise LALRError("Unknown conflict in state %d" % st) 

2669 else: 

2670 st_action[a] = -p.number 

2671 st_actionp[a] = p 

2672 Productions[p.number].reduced += 1 

2673 else: 

2674 i = p.lr_index 

2675 a = p.prod[i + 1] # Get symbol right after the "." 

2676 if a in self.grammar.Terminals: 

2677 g = self.lr0_goto(I, a) 

2678 j = self.lr0_cidhash.get(id(g), -1) 

2679 if j >= 0: 

2680 # We are in a shift state 

2681 actlist.append((a, p, "shift and go to state %d" % j)) 

2682 r = st_action.get(a) 

2683 if r is not None: 

2684 # Whoa have a shift/reduce or shift/shift conflict 

2685 if r > 0: 

2686 if r != j: 

2687 raise LALRError("Shift/shift conflict in state %d" % st) 

2688 elif r < 0: 

2689 # Do a precedence check. 

2690 # - if precedence of reduce rule is higher, we reduce. 

2691 # - if precedence of reduce is same and left assoc, we reduce. 

2692 # - otherwise we shift 

2693 

2694 # Shift precedence comes from the token 

2695 sprec, slevel = Precedence.get(a, ("right", 0)) 

2696 

2697 # Reduce precedence comes from the rule that could have been reduced 

2698 rprec, rlevel = Productions[st_actionp[a].number].prec 

2699 

2700 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == "right")): 

2701 # We decide to shift here... highest precedence to shift 

2702 Productions[st_actionp[a].number].reduced -= 1 

2703 st_action[a] = j 

2704 st_actionp[a] = p 

2705 if not rlevel: 

2706 log.info(" ! shift/reduce conflict for %s resolved as shift", a) 

2707 self.sr_conflicts.append((st, a, "shift")) 

2708 elif (slevel == rlevel) and (rprec == "nonassoc"): 

2709 st_action[a] = None 

2710 else: 

2711 # Hmmm. Guess we'll keep the reduce 

2712 if not slevel and not rlevel: 

2713 log.info(" ! shift/reduce conflict for %s resolved as reduce", a) 

2714 self.sr_conflicts.append((st, a, "reduce")) 

2715 

2716 else: 

2717 raise LALRError("Unknown conflict in state %d" % st) 

2718 else: 

2719 st_action[a] = j 

2720 st_actionp[a] = p 

2721 

2722 # Print the actions associated with each terminal 

2723 _actprint = {} 

2724 for a, p, m in actlist: 

2725 if a in st_action: 

2726 if p is st_actionp[a]: 

2727 log.info(" %-15s %s", a, m) 

2728 _actprint[(a, m)] = 1 

2729 log.info("") 

2730 # Print the actions that were not used. (debugging) 

2731 not_used = 0 

2732 for a, p, m in actlist: 

2733 if a in st_action: 

2734 if p is not st_actionp[a]: 

2735 if not (a, m) in _actprint: 

2736 log.debug(" ! %-15s [ %s ]", a, m) 

2737 not_used = 1 

2738 _actprint[(a, m)] = 1 

2739 if not_used: 

2740 log.debug("") 

2741 

2742 # Construct the goto table for this state 

2743 

2744 nkeys = {} 

2745 for ii in I: 

2746 for s in ii.usyms: 

2747 if s in self.grammar.Nonterminals: 

2748 nkeys[s] = None 

2749 for n in nkeys: 

2750 g = self.lr0_goto(I, n) 

2751 j = self.lr0_cidhash.get(id(g), -1) 

2752 if j >= 0: 

2753 st_goto[n] = j 

2754 log.info(" %-30s shift and go to state %d", n, j) 

2755 

2756 action[st] = st_action 

2757 actionp[st] = st_actionp 

2758 goto[st] = st_goto 

2759 st += 1 

2760 

2761 # ----------------------------------------------------------------------------- 

2762 # write() 

2763 # 

2764 # This function writes the LR parsing tables to a file 

2765 # ----------------------------------------------------------------------------- 

2766 

2767 def write_table(self, tabmodule, outputdir="", signature=""): 

2768 if isinstance(tabmodule, types.ModuleType): 

2769 raise IOError("Won't overwrite existing tabmodule") 

2770 

2771 basemodulename = tabmodule.split(".")[-1] 

2772 filename = os.path.join(outputdir, basemodulename) + ".py" 

2773 try: 

2774 f = open(filename, "w") 

2775 

2776 f.write( 

2777 """ 

2778# %s 

2779# This file is automatically generated. Do not edit. 

2780# pylint: disable=W,C,R 

2781_tabversion = %r 

2782 

2783_lr_method = %r 

2784 

2785_lr_signature = %r 

2786 """ 

2787 % (os.path.basename(filename), __tabversion__, self.lr_method, signature) 

2788 ) 

2789 

2790 # Change smaller to 0 to go back to original tables 

2791 smaller = 1 

2792 

2793 # Factor out names to try and make smaller 

2794 if smaller: 

2795 items = {} 

2796 

2797 for s, nd in self.lr_action.items(): 

2798 for name, v in nd.items(): 

2799 i = items.get(name) 

2800 if not i: 

2801 i = ([], []) 

2802 items[name] = i 

2803 i[0].append(s) 

2804 i[1].append(v) 

2805 

2806 f.write("\n_lr_action_items = {") 

2807 for k, v in items.items(): 

2808 f.write("%r:([" % k) 

2809 for i in v[0]: 

2810 f.write("%r," % i) 

2811 f.write("],[") 

2812 for i in v[1]: 

2813 f.write("%r," % i) 

2814 

2815 f.write("]),") 

2816 f.write("}\n") 

2817 

2818 f.write( 

2819 """ 

2820_lr_action = {} 

2821for _k, _v in _lr_action_items.items(): 

2822 for _x,_y in zip(_v[0],_v[1]): 

2823 if not _x in _lr_action: _lr_action[_x] = {} 

2824 _lr_action[_x][_k] = _y 

2825del _lr_action_items 

2826""" 

2827 ) 

2828 

2829 else: 

2830 f.write("\n_lr_action = { ") 

2831 for k, v in self.lr_action.items(): 

2832 f.write("(%r,%r):%r," % (k[0], k[1], v)) 

2833 f.write("}\n") 

2834 

2835 if smaller: 

2836 # Factor out names to try and make smaller 

2837 items = {} 

2838 

2839 for s, nd in self.lr_goto.items(): 

2840 for name, v in nd.items(): 

2841 i = items.get(name) 

2842 if not i: 

2843 i = ([], []) 

2844 items[name] = i 

2845 i[0].append(s) 

2846 i[1].append(v) 

2847 

2848 f.write("\n_lr_goto_items = {") 

2849 for k, v in items.items(): 

2850 f.write("%r:([" % k) 

2851 for i in v[0]: 

2852 f.write("%r," % i) 

2853 f.write("],[") 

2854 for i in v[1]: 

2855 f.write("%r," % i) 

2856 

2857 f.write("]),") 

2858 f.write("}\n") 

2859 

2860 f.write( 

2861 """ 

2862_lr_goto = {} 

2863for _k, _v in _lr_goto_items.items(): 

2864 for _x, _y in zip(_v[0], _v[1]): 

2865 if not _x in _lr_goto: _lr_goto[_x] = {} 

2866 _lr_goto[_x][_k] = _y 

2867del _lr_goto_items 

2868""" 

2869 ) 

2870 else: 

2871 f.write("\n_lr_goto = { ") 

2872 for k, v in self.lr_goto.items(): 

2873 f.write("(%r,%r):%r," % (k[0], k[1], v)) 

2874 f.write("}\n") 

2875 

2876 # Write production table 

2877 f.write("_lr_productions = [\n") 

2878 for p in self.lr_productions: 

2879 if p.func: 

2880 f.write( 

2881 " (%r,%r,%d,%r,%r,%d),\n" 

2882 % (p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line) 

2883 ) 

2884 else: 

2885 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p), p.name, p.len)) 

2886 f.write("]\n") 

2887 f.close() 

2888 

2889 except IOError as e: 

2890 raise 

2891 

2892 # ----------------------------------------------------------------------------- 

2893 # pickle_table() 

2894 # 

2895 # This function pickles the LR parsing tables to a supplied file object 

2896 # ----------------------------------------------------------------------------- 

2897 

2898 def pickle_table(self, filename, signature=""): 

2899 try: 

2900 import cPickle as pickle 

2901 except ImportError: 

2902 import pickle 

2903 with open(filename, "wb") as outf: 

2904 pickle.dump(__tabversion__, outf, pickle_protocol) 

2905 pickle.dump(self.lr_method, outf, pickle_protocol) 

2906 pickle.dump(signature, outf, pickle_protocol) 

2907 pickle.dump(self.lr_action, outf, pickle_protocol) 

2908 pickle.dump(self.lr_goto, outf, pickle_protocol) 

2909 

2910 outp = [] 

2911 for p in self.lr_productions: 

2912 if p.func: 

2913 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) 

2914 else: 

2915 outp.append((str(p), p.name, p.len, None, None, None)) 

2916 pickle.dump(outp, outf, pickle_protocol) 

2917 

2918 

2919# ----------------------------------------------------------------------------- 

2920# === INTROSPECTION === 

2921# 

2922# The following functions and classes are used to implement the PLY 

2923# introspection features followed by the yacc() function itself. 

2924# ----------------------------------------------------------------------------- 

2925 

2926# ----------------------------------------------------------------------------- 

2927# get_caller_module_dict() 

2928# 

2929# This function returns a dictionary containing all of the symbols defined within 

2930# a caller further down the call stack. This is used to get the environment 

2931# associated with the yacc() call if none was provided. 

2932# ----------------------------------------------------------------------------- 

2933 

2934 

2935def get_caller_module_dict(levels): 

2936 f = sys._getframe(levels) 

2937 ldict = f.f_globals.copy() 

2938 if f.f_globals != f.f_locals: 

2939 ldict.update(f.f_locals) 

2940 return ldict 

2941 

2942 

2943# ----------------------------------------------------------------------------- 

2944# parse_grammar() 

2945# 

2946# This takes a raw grammar rule string and parses it into production data 

2947# ----------------------------------------------------------------------------- 

2948def parse_grammar(doc, file, line): 

2949 grammar = [] 

2950 # Split the doc string into lines 

2951 pstrings = doc.splitlines() 

2952 lastp = None 

2953 dline = line 

2954 for ps in pstrings: 

2955 dline += 1 

2956 p = ps.split() 

2957 if not p: 

2958 continue 

2959 try: 

2960 if p[0] == "|": 

2961 # This is a continuation of a previous rule 

2962 if not lastp: 

2963 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) 

2964 prodname = lastp 

2965 syms = p[1:] 

2966 else: 

2967 prodname = p[0] 

2968 lastp = prodname 

2969 syms = p[2:] 

2970 assign = p[1] 

2971 if assign != ":" and assign != "::=": 

2972 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) 

2973 

2974 grammar.append((file, dline, prodname, syms)) 

2975 except SyntaxError: 

2976 raise 

2977 except Exception: 

2978 raise SyntaxError("%s:%d: Syntax error in rule %r" % (file, dline, ps.strip())) 

2979 

2980 return grammar 

2981 

2982 

2983# ----------------------------------------------------------------------------- 

2984# ParserReflect() 

2985# 

2986# This class represents information extracted for building a parser including 

2987# start symbol, error function, tokens, precedence list, action functions, 

2988# etc. 

2989# ----------------------------------------------------------------------------- 

2990class ParserReflect(object): 

2991 def __init__(self, pdict, log=None): 

2992 self.pdict = pdict 

2993 self.start = None 

2994 self.error_func = None 

2995 self.tokens = None 

2996 self.modules = set() 

2997 self.grammar = [] 

2998 self.error = False 

2999 

3000 if log is None: 

3001 self.log = PlyLogger(sys.stderr) 

3002 else: 

3003 self.log = log 

3004 

3005 # Get all of the basic information 

3006 def get_all(self): 

3007 self.get_start() 

3008 self.get_error_func() 

3009 self.get_tokens() 

3010 self.get_precedence() 

3011 self.get_pfunctions() 

3012 

3013 # Validate all of the information 

3014 def validate_all(self): 

3015 self.validate_start() 

3016 self.validate_error_func() 

3017 self.validate_tokens() 

3018 self.validate_precedence() 

3019 self.validate_pfunctions() 

3020 self.validate_modules() 

3021 return self.error 

3022 

3023 # Compute a signature over the grammar 

3024 def signature(self): 

3025 parts = [] 

3026 try: 

3027 if self.start: 

3028 parts.append(self.start) 

3029 if self.prec: 

3030 parts.append("".join(["".join(p) for p in self.prec])) 

3031 if self.tokens: 

3032 parts.append(" ".join(self.tokens)) 

3033 for f in self.pfuncs: 

3034 if f[3]: 

3035 parts.append(f[3]) 

3036 except (TypeError, ValueError): 

3037 pass 

3038 return "".join(parts) 

3039 

3040 # ----------------------------------------------------------------------------- 

3041 # validate_modules() 

3042 # 

3043 # This method checks to see if there are duplicated p_rulename() functions 

3044 # in the parser module file. Without this function, it is really easy for 

3045 # users to make mistakes by cutting and pasting code fragments (and it's a real 

3046 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 

3047 # we just do a little regular expression pattern matching of def statements 

3048 # to try and detect duplicates. 

3049 # ----------------------------------------------------------------------------- 

3050 

3051 def validate_modules(self): 

3052 # Match def p_funcname( 

3053 fre = re.compile(r"\s*def\s+(p_[a-zA-Z_0-9]*)\(") 

3054 

3055 for module in self.modules: 

3056 try: 

3057 lines, linen = inspect.getsourcelines(module) 

3058 except IOError: 

3059 continue 

3060 

3061 counthash = {} 

3062 for linen, line in enumerate(lines): 

3063 linen += 1 

3064 m = fre.match(line) 

3065 if m: 

3066 name = m.group(1) 

3067 prev = counthash.get(name) 

3068 if not prev: 

3069 counthash[name] = linen 

3070 else: 

3071 filename = inspect.getsourcefile(module) 

3072 self.log.warning( 

3073 "%s:%d: Function %s redefined. Previously defined on line %d", 

3074 filename, 

3075 linen, 

3076 name, 

3077 prev, 

3078 ) 

3079 

3080 # Get the start symbol 

3081 def get_start(self): 

3082 self.start = self.pdict.get("start") 

3083 

3084 # Validate the start symbol 

3085 def validate_start(self): 

3086 if self.start is not None: 

3087 if not isinstance(self.start, string_types): 

3088 self.log.error("'start' must be a string") 

3089 

3090 # Look for error handler 

3091 def get_error_func(self): 

3092 self.error_func = self.pdict.get("p_error") 

3093 

3094 # Validate the error function 

3095 def validate_error_func(self): 

3096 if self.error_func: 

3097 if isinstance(self.error_func, types.FunctionType): 

3098 ismethod = 0 

3099 elif isinstance(self.error_func, types.MethodType): 

3100 ismethod = 1 

3101 else: 

3102 self.log.error("'p_error' defined, but is not a function or method") 

3103 self.error = True 

3104 return 

3105 

3106 eline = self.error_func.__code__.co_firstlineno 

3107 efile = self.error_func.__code__.co_filename 

3108 module = inspect.getmodule(self.error_func) 

3109 self.modules.add(module) 

3110 

3111 argcount = self.error_func.__code__.co_argcount - ismethod 

3112 if argcount != 1: 

3113 self.log.error("%s:%d: p_error() requires 1 argument", efile, eline) 

3114 self.error = True 

3115 

3116 # Get the tokens map 

3117 def get_tokens(self): 

3118 tokens = self.pdict.get("tokens") 

3119 if not tokens: 

3120 self.log.error("No token list is defined") 

3121 self.error = True 

3122 return 

3123 

3124 if not isinstance(tokens, (list, tuple)): 

3125 self.log.error("tokens must be a list or tuple") 

3126 self.error = True 

3127 return 

3128 

3129 if not tokens: 

3130 self.log.error("tokens is empty") 

3131 self.error = True 

3132 return 

3133 

3134 self.tokens = sorted(tokens) 

3135 

3136 # Validate the tokens 

3137 def validate_tokens(self): 

3138 # Validate the tokens. 

3139 if "error" in self.tokens: 

3140 self.log.error("Illegal token name 'error'. Is a reserved word") 

3141 self.error = True 

3142 return 

3143 

3144 terminals = set() 

3145 for n in self.tokens: 

3146 if n in terminals: 

3147 self.log.warning("Token %r multiply defined", n) 

3148 terminals.add(n) 

3149 

3150 # Get the precedence map (if any) 

3151 def get_precedence(self): 

3152 self.prec = self.pdict.get("precedence") 

3153 

3154 # Validate and parse the precedence map 

3155 def validate_precedence(self): 

3156 preclist = [] 

3157 if self.prec: 

3158 if not isinstance(self.prec, (list, tuple)): 

3159 self.log.error("precedence must be a list or tuple") 

3160 self.error = True 

3161 return 

3162 for level, p in enumerate(self.prec): 

3163 if not isinstance(p, (list, tuple)): 

3164 self.log.error("Bad precedence table") 

3165 self.error = True 

3166 return 

3167 

3168 if len(p) < 2: 

3169 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)", p) 

3170 self.error = True 

3171 return 

3172 assoc = p[0] 

3173 if not isinstance(assoc, string_types): 

3174 self.log.error("precedence associativity must be a string") 

3175 self.error = True 

3176 return 

3177 for term in p[1:]: 

3178 if not isinstance(term, string_types): 

3179 self.log.error("precedence items must be strings") 

3180 self.error = True 

3181 return 

3182 preclist.append((term, assoc, level + 1)) 

3183 self.preclist = preclist 

3184 

3185 # Get all p_functions from the grammar 

3186 def get_pfunctions(self): 

3187 p_functions = [] 

3188 for name, item in self.pdict.items(): 

3189 if not name.startswith("p_") or name == "p_error": 

3190 continue 

3191 if isinstance(item, (types.FunctionType, types.MethodType)): 

3192 line = getattr(item, "co_firstlineno", item.__code__.co_firstlineno) 

3193 module = inspect.getmodule(item) 

3194 p_functions.append((line, module, name, item.__doc__)) 

3195 

3196 # Sort all of the actions by line number; make sure to stringify 

3197 # modules to make them sortable, since `line` may not uniquely sort all 

3198 # p functions 

3199 p_functions.sort( 

3200 key=lambda p_function: (p_function[0], str(p_function[1]), p_function[2], p_function[3]) 

3201 ) 

3202 self.pfuncs = p_functions 

3203 

3204 # Validate all of the p_functions 

3205 def validate_pfunctions(self): 

3206 grammar = [] 

3207 # Check for non-empty symbols 

3208 if len(self.pfuncs) == 0: 

3209 self.log.error("no rules of the form p_rulename are defined") 

3210 self.error = True 

3211 return 

3212 

3213 for line, module, name, doc in self.pfuncs: 

3214 file = inspect.getsourcefile(module) 

3215 func = self.pdict[name] 

3216 if isinstance(func, types.MethodType): 

3217 reqargs = 2 

3218 else: 

3219 reqargs = 1 

3220 if func.__code__.co_argcount > reqargs: 

3221 self.log.error("%s:%d: Rule %r has too many arguments", file, line, func.__name__) 

3222 self.error = True 

3223 elif func.__code__.co_argcount < reqargs: 

3224 self.log.error("%s:%d: Rule %r requires an argument", file, line, func.__name__) 

3225 self.error = True 

3226 elif not func.__doc__: 

3227 self.log.warning( 

3228 "%s:%d: No documentation string specified in function %r (ignored)", 

3229 file, 

3230 line, 

3231 func.__name__, 

3232 ) 

3233 else: 

3234 try: 

3235 parsed_g = parse_grammar(doc, file, line) 

3236 for g in parsed_g: 

3237 grammar.append((name, g)) 

3238 except SyntaxError as e: 

3239 self.log.error(str(e)) 

3240 self.error = True 

3241 

3242 # Looks like a valid grammar rule 

3243 # Mark the file in which defined. 

3244 self.modules.add(module) 

3245 

3246 # Secondary validation step that looks for p_ definitions that are not functions 

3247 # or functions that look like they might be grammar rules. 

3248 

3249 for n, v in self.pdict.items(): 

3250 if n.startswith("p_") and isinstance(v, (types.FunctionType, types.MethodType)): 

3251 continue 

3252 if n.startswith("t_"): 

3253 continue 

3254 if n.startswith("p_") and n != "p_error": 

3255 self.log.warning("%r not defined as a function", n) 

3256 if (isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or ( 

3257 isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2 

3258 ): 

3259 if v.__doc__: 

3260 try: 

3261 doc = v.__doc__.split(" ") 

3262 if doc[1] == ":": 

3263 self.log.warning( 

3264 "%s:%d: Possible grammar rule %r defined without p_ prefix", 

3265 v.__code__.co_filename, 

3266 v.__code__.co_firstlineno, 

3267 n, 

3268 ) 

3269 except IndexError: 

3270 pass 

3271 

3272 self.grammar = grammar 

3273 

3274 

3275# ----------------------------------------------------------------------------- 

3276# yacc(module) 

3277# 

3278# Build a parser 

3279# ----------------------------------------------------------------------------- 

3280 

3281 

3282def yacc( 

3283 method="LALR", 

3284 debug=yaccdebug, 

3285 module=None, 

3286 tabmodule=tab_module, 

3287 start=None, 

3288 check_recursion=True, 

3289 optimize=False, 

3290 write_tables=True, 

3291 debugfile=debug_file, 

3292 outputdir=None, 

3293 debuglog=None, 

3294 errorlog=None, 

3295 picklefile=None, 

3296): 

3297 if tabmodule is None: 

3298 tabmodule = tab_module 

3299 

3300 # Reference to the parsing method of the last built parser 

3301 global parse 

3302 

3303 # If pickling is enabled, table files are not created 

3304 if picklefile: 

3305 write_tables = 0 

3306 

3307 if errorlog is None: 

3308 errorlog = PlyLogger(sys.stderr) 

3309 

3310 # Get the module dictionary used for the parser 

3311 if module: 

3312 _items = [(k, getattr(module, k)) for k in dir(module)] 

3313 pdict = dict(_items) 

3314 # If no __file__ or __package__ attributes are available, try to obtain them 

3315 # from the __module__ instead 

3316 if "__file__" not in pdict: 

3317 pdict["__file__"] = sys.modules[pdict["__module__"]].__file__ 

3318 if "__package__" not in pdict and "__module__" in pdict: 

3319 if hasattr(sys.modules[pdict["__module__"]], "__package__"): 

3320 pdict["__package__"] = sys.modules[pdict["__module__"]].__package__ 

3321 else: 

3322 pdict = get_caller_module_dict(2) 

3323 

3324 if outputdir is None: 

3325 # If no output directory is set, the location of the output files 

3326 # is determined according to the following rules: 

3327 # - If tabmodule specifies a package, files go into that package directory 

3328 # - Otherwise, files go in the same directory as the specifying module 

3329 if isinstance(tabmodule, types.ModuleType): 

3330 srcfile = tabmodule.__file__ 

3331 else: 

3332 if "." not in tabmodule: 

3333 srcfile = pdict["__file__"] 

3334 else: 

3335 parts = tabmodule.split(".") 

3336 pkgname = ".".join(parts[:-1]) 

3337 exec("import %s" % pkgname) 

3338 srcfile = getattr(sys.modules[pkgname], "__file__", "") 

3339 outputdir = os.path.dirname(srcfile) 

3340 

3341 # Determine if the module is package of a package or not. 

3342 # If so, fix the tabmodule setting so that tables load correctly 

3343 pkg = pdict.get("__package__") 

3344 if pkg and isinstance(tabmodule, str): 

3345 if "." not in tabmodule: 

3346 tabmodule = pkg + "." + tabmodule 

3347 

3348 # Set start symbol if it's specified directly using an argument 

3349 if start is not None: 

3350 pdict["start"] = start 

3351 

3352 # Collect parser information from the dictionary 

3353 pinfo = ParserReflect(pdict, log=errorlog) 

3354 pinfo.get_all() 

3355 

3356 if pinfo.error: 

3357 raise YaccError("Unable to build parser") 

3358 

3359 # Check signature against table files (if any) 

3360 signature = pinfo.signature() 

3361 

3362 # Read the tables 

3363 try: 

3364 lr = LRTable() 

3365 if picklefile: 

3366 read_signature = lr.read_pickle(picklefile) 

3367 else: 

3368 read_signature = lr.read_table(tabmodule) 

3369 if optimize or (read_signature == signature): 

3370 try: 

3371 lr.bind_callables(pinfo.pdict) 

3372 parser = LRParser(lr, pinfo.error_func) 

3373 parse = parser.parse 

3374 return parser 

3375 except Exception as e: 

3376 errorlog.warning("There was a problem loading the table file: %r", e) 

3377 except VersionError as e: 

3378 errorlog.warning(str(e)) 

3379 except ImportError: 

3380 pass 

3381 

3382 if debuglog is None: 

3383 if debug: 

3384 try: 

3385 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), "w")) 

3386 except IOError as e: 

3387 errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) 

3388 debuglog = NullLogger() 

3389 else: 

3390 debuglog = NullLogger() 

3391 

3392 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) 

3393 

3394 errors = False 

3395 

3396 # Validate the parser information 

3397 if pinfo.validate_all(): 

3398 raise YaccError("Unable to build parser") 

3399 

3400 if not pinfo.error_func: 

3401 errorlog.warning("no p_error() function is defined") 

3402 

3403 # Create a grammar object 

3404 grammar = Grammar(pinfo.tokens) 

3405 

3406 # Set precedence level for terminals 

3407 for term, assoc, level in pinfo.preclist: 

3408 try: 

3409 grammar.set_precedence(term, assoc, level) 

3410 except GrammarError as e: 

3411 errorlog.warning("%s", e) 

3412 

3413 # Add productions to the grammar 

3414 for funcname, gram in pinfo.grammar: 

3415 file, line, prodname, syms = gram 

3416 try: 

3417 grammar.add_production(prodname, syms, funcname, file, line) 

3418 except GrammarError as e: 

3419 errorlog.error("%s", e) 

3420 errors = True 

3421 

3422 # Set the grammar start symbols 

3423 try: 

3424 if start is None: 

3425 grammar.set_start(pinfo.start) 

3426 else: 

3427 grammar.set_start(start) 

3428 except GrammarError as e: 

3429 errorlog.error(str(e)) 

3430 errors = True 

3431 

3432 if errors: 

3433 raise YaccError("Unable to build parser") 

3434 

3435 # Verify the grammar structure 

3436 undefined_symbols = grammar.undefined_symbols() 

3437 for sym, prod in undefined_symbols: 

3438 errorlog.error( 

3439 "%s:%d: Symbol %r used, but not defined as a token or a rule", prod.file, prod.line, sym 

3440 ) 

3441 errors = True 

3442 

3443 unused_terminals = grammar.unused_terminals() 

3444 if unused_terminals: 

3445 debuglog.info("") 

3446 debuglog.info("Unused terminals:") 

3447 debuglog.info("") 

3448 for term in unused_terminals: 

3449 errorlog.warning("Token %r defined, but not used", term) 

3450 debuglog.info(" %s", term) 

3451 

3452 # Print out all productions to the debug log 

3453 if debug: 

3454 debuglog.info("") 

3455 debuglog.info("Grammar") 

3456 debuglog.info("") 

3457 for n, p in enumerate(grammar.Productions): 

3458 debuglog.info("Rule %-5d %s", n, p) 

3459 

3460 # Find unused non-terminals 

3461 unused_rules = grammar.unused_rules() 

3462 for prod in unused_rules: 

3463 errorlog.warning("%s:%d: Rule %r defined, but not used", prod.file, prod.line, prod.name) 

3464 

3465 if len(unused_terminals) == 1: 

3466 errorlog.warning("There is 1 unused token") 

3467 if len(unused_terminals) > 1: 

3468 errorlog.warning("There are %d unused tokens", len(unused_terminals)) 

3469 

3470 if len(unused_rules) == 1: 

3471 errorlog.warning("There is 1 unused rule") 

3472 if len(unused_rules) > 1: 

3473 errorlog.warning("There are %d unused rules", len(unused_rules)) 

3474 

3475 if debug: 

3476 debuglog.info("") 

3477 debuglog.info("Terminals, with rules where they appear") 

3478 debuglog.info("") 

3479 terms = list(grammar.Terminals) 

3480 terms.sort() 

3481 for term in terms: 

3482 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) 

3483 

3484 debuglog.info("") 

3485 debuglog.info("Nonterminals, with rules where they appear") 

3486 debuglog.info("") 

3487 nonterms = list(grammar.Nonterminals) 

3488 nonterms.sort() 

3489 for nonterm in nonterms: 

3490 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) 

3491 debuglog.info("") 

3492 

3493 if check_recursion: 

3494 unreachable = grammar.find_unreachable() 

3495 for u in unreachable: 

3496 errorlog.warning("Symbol %r is unreachable", u) 

3497 

3498 infinite = grammar.infinite_cycles() 

3499 for inf in infinite: 

3500 errorlog.error("Infinite recursion detected for symbol %r", inf) 

3501 errors = True 

3502 

3503 unused_prec = grammar.unused_precedence() 

3504 for term, assoc in unused_prec: 

3505 errorlog.error("Precedence rule %r defined for unknown symbol %r", assoc, term) 

3506 errors = True 

3507 

3508 if errors: 

3509 raise YaccError("Unable to build parser") 

3510 

3511 # Run the LRGeneratedTable on the grammar 

3512 if debug: 

3513 errorlog.debug("Generating %s tables", method) 

3514 

3515 lr = LRGeneratedTable(grammar, method, debuglog) 

3516 

3517 if debug: 

3518 num_sr = len(lr.sr_conflicts) 

3519 

3520 # Report shift/reduce and reduce/reduce conflicts 

3521 if num_sr == 1: 

3522 errorlog.warning("1 shift/reduce conflict") 

3523 elif num_sr > 1: 

3524 errorlog.warning("%d shift/reduce conflicts", num_sr) 

3525 

3526 num_rr = len(lr.rr_conflicts) 

3527 if num_rr == 1: 

3528 errorlog.warning("1 reduce/reduce conflict") 

3529 elif num_rr > 1: 

3530 errorlog.warning("%d reduce/reduce conflicts", num_rr) 

3531 

3532 # Write out conflicts to the output file 

3533 if debug and (lr.sr_conflicts or lr.rr_conflicts): 

3534 debuglog.warning("") 

3535 debuglog.warning("Conflicts:") 

3536 debuglog.warning("") 

3537 

3538 for state, tok, resolution in lr.sr_conflicts: 

3539 debuglog.warning( 

3540 "shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution 

3541 ) 

3542 

3543 already_reported = set() 

3544 for state, rule, rejected in lr.rr_conflicts: 

3545 if (state, id(rule), id(rejected)) in already_reported: 

3546 continue 

3547 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 

3548 debuglog.warning("rejected rule (%s) in state %d", rejected, state) 

3549 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 

3550 errorlog.warning("rejected rule (%s) in state %d", rejected, state) 

3551 already_reported.add((state, id(rule), id(rejected))) 

3552 

3553 warned_never = [] 

3554 for state, rule, rejected in lr.rr_conflicts: 

3555 if not rejected.reduced and (rejected not in warned_never): 

3556 debuglog.warning("Rule (%s) is never reduced", rejected) 

3557 errorlog.warning("Rule (%s) is never reduced", rejected) 

3558 warned_never.append(rejected) 

3559 

3560 # Write the table file if requested 

3561 if write_tables: 

3562 try: 

3563 lr.write_table(tabmodule, outputdir, signature) 

3564 if tabmodule in sys.modules: 

3565 del sys.modules[tabmodule] 

3566 except IOError as e: 

3567 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) 

3568 

3569 # Write a pickled version of the tables 

3570 if picklefile: 

3571 try: 

3572 lr.pickle_table(picklefile, signature) 

3573 except IOError as e: 

3574 errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) 

3575 

3576 # Build the parser 

3577 lr.bind_callables(pinfo.pdict) 

3578 parser = LRParser(lr, pinfo.error_func) 

3579 

3580 parse = parser.parse 

3581 return parser