Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/ply/yacc.py: 5%

1929 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-07 02:47 -0700

1# ----------------------------------------------------------------------------- 

2# ply: yacc.py 

3# 

4# Copyright (C) 2001-2018 

5# David M. Beazley (Dabeaz LLC) 

6# All rights reserved. 

7# 

8# Redistribution and use in source and binary forms, with or without 

9# modification, are permitted provided that the following conditions are 

10# met: 

11# 

12# * Redistributions of source code must retain the above copyright notice, 

13# this list of conditions and the following disclaimer. 

14# * Redistributions in binary form must reproduce the above copyright notice, 

15# this list of conditions and the following disclaimer in the documentation 

16# and/or other materials provided with the distribution. 

17# * Neither the name of the David Beazley or Dabeaz LLC may be used to 

18# endorse or promote products derived from this software without 

19# specific prior written permission. 

20# 

21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

32# ----------------------------------------------------------------------------- 

33# 

34# This implements an LR parser that is constructed from grammar rules defined 

35# as Python functions. The grammar is specified by supplying the BNF inside 

36# Python documentation strings. The inspiration for this technique was borrowed 

37# from John Aycock's Spark parsing system. PLY might be viewed as cross between 

38# Spark and the GNU bison utility. 

39# 

40# The current implementation is only somewhat object-oriented. The 

41# LR parser itself is defined in terms of an object (which allows multiple 

42# parsers to co-exist). However, most of the variables used during table 

43# construction are defined in terms of global variables. Users shouldn't 

44# notice unless they are trying to define multiple parsers at the same 

45# time using threads (in which case they should have their head examined). 

46# 

47# This implementation supports both SLR and LALR(1) parsing. LALR(1) 

48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 

49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 

50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 

51# by the more efficient DeRemer and Pennello algorithm. 

52# 

53# :::::::: WARNING ::::::: 

54# 

55# Construction of LR parsing tables is fairly complicated and expensive. 

56# To make this module run fast, a *LOT* of work has been put into 

57# optimization---often at the expensive of readability and what might 

58# consider to be good Python "coding style." Modify the code at your 

59# own risk! 

60# ---------------------------------------------------------------------------- 

61 

62import inspect 

63import os.path 

64import re 

65import sys 

66import types 

67import warnings 

68 

69__version__ = "3.11" 

70__tabversion__ = "3.10" 

71 

72# ----------------------------------------------------------------------------- 

73# === User configurable parameters === 

74# 

75# Change these to modify the default behavior of yacc (if you wish) 

76# ----------------------------------------------------------------------------- 

77 

78yaccdebug = True # Debugging mode. If set, yacc generates a 

79# a 'parser.out' file in the current directory 

80 

81debug_file = "parser.out" # Default name of the debugging file 

82tab_module = "parsetab" # Default name of the table module 

83default_lr = "LALR" # Default LR table generation method 

84 

85error_count = 3 # Number of symbols that must be shifted to leave recovery mode 

86 

87yaccdevel = False # Set to True if developing yacc. This turns off optimized 

88# implementations of certain functions. 

89 

90resultlimit = 40 # Size limit of results when running in debug mode. 

91 

92pickle_protocol = 0 # Protocol to use when writing pickle files 

93 

94# String type-checking compatibility 

95if sys.version_info[0] < 3: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 string_types = basestring 

97else: 

98 string_types = str 

99 

100MAXINT = sys.maxsize 

101 

102# This object is a stand-in for a logging object created by the 

103# logging module. PLY will use this by default to create things 

104# such as the parser.out file. If a user wants more detailed 

105# information, they can create their own logging object and pass 

106# it into PLY. 

107 

108 

109class PlyLogger(object): 

110 def __init__(self, f): 

111 self.f = f 

112 

113 def debug(self, msg, *args, **kwargs): 

114 self.f.write((msg % args) + "\n") 

115 

116 info = debug 

117 

118 def warning(self, msg, *args, **kwargs): 

119 self.f.write("WARNING: " + (msg % args) + "\n") 

120 

121 def error(self, msg, *args, **kwargs): 

122 self.f.write("ERROR: " + (msg % args) + "\n") 

123 

124 critical = debug 

125 

126 

127# Null logger is used when no output is generated. Does nothing. 

128class NullLogger(object): 

129 def __getattribute__(self, name): 

130 return self 

131 

132 def __call__(self, *args, **kwargs): 

133 return self 

134 

135 

136# Exception raised for yacc-related errors 

137class YaccError(Exception): 

138 pass 

139 

140 

141# Format the result message that the parser produces when running in debug mode. 

142def format_result(r): 

143 repr_str = repr(r) 

144 if "\n" in repr_str: 

145 repr_str = repr(repr_str) 

146 if len(repr_str) > resultlimit: 

147 repr_str = repr_str[:resultlimit] + " ..." 

148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__, id(r), repr_str) 

149 return result 

150 

151 

152# Format stack entries when the parser is running in debug mode 

153def format_stack_entry(r): 

154 repr_str = repr(r) 

155 if "\n" in repr_str: 

156 repr_str = repr(repr_str) 

157 if len(repr_str) < 16: 

158 return repr_str 

159 else: 

160 return "<%s @ 0x%x>" % (type(r).__name__, id(r)) 

161 

162 

163# Panic mode error recovery support. This feature is being reworked--much of the 

164# code here is to offer a deprecation/backwards compatible transition 

165 

166_errok = None 

167_token = None 

168_restart = None 

169_warnmsg = """PLY: Don't use global functions errok(), token(), and restart() in p_error(). 

170Instead, invoke the methods on the associated parser instance: 

171 

172 def p_error(p): 

173 ... 

174 # Use parser.errok(), parser.token(), parser.restart() 

175 ... 

176 

177 parser = yacc.yacc() 

178""" 

179 

180 

181def errok(): 

182 warnings.warn(_warnmsg) 

183 return _errok() 

184 

185 

186def restart(): 

187 warnings.warn(_warnmsg) 

188 return _restart() 

189 

190 

191def token(): 

192 warnings.warn(_warnmsg) 

193 return _token() 

194 

195 

196# Utility function to call the p_error() function with some deprecation hacks 

197def call_errorfunc(errorfunc, token, parser): 

198 global _errok, _token, _restart 

199 _errok = parser.errok 

200 _token = parser.token 

201 _restart = parser.restart 

202 r = errorfunc(token) 

203 try: 

204 del _errok, _token, _restart 

205 except NameError: 

206 pass 

207 return r 

208 

209 

210# ----------------------------------------------------------------------------- 

211# === LR Parsing Engine === 

212# 

213# The following classes are used for the LR parser itself. These are not 

214# used during table construction and are independent of the actual LR 

215# table generation algorithm 

216# ----------------------------------------------------------------------------- 

217 

218# This class is used to hold non-terminal grammar symbols during parsing. 

219# It normally has the following attributes set: 

220# .type = Grammar symbol type 

221# .value = Symbol value 

222# .lineno = Starting line number 

223# .endlineno = Ending line number (optional, set automatically) 

224# .lexpos = Starting lex position 

225# .endlexpos = Ending lex position (optional, set automatically) 

226 

227 

228class YaccSymbol: 

229 def __str__(self): 

230 return self.type 

231 

232 def __repr__(self): 

233 return str(self) 

234 

235 

236# This class is a wrapper around the objects actually passed to each 

237# grammar rule. Index lookup and assignment actually assign the 

238# .value attribute of the underlying YaccSymbol object. 

239# The lineno() method returns the line number of a given 

240# item (or 0 if not defined). The linespan() method returns 

241# a tuple of (startline,endline) representing the range of lines 

242# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 

243# representing the range of positional information for a symbol. 

244 

245 

246class YaccProduction: 

247 def __init__(self, s, stack=None): 

248 self.slice = s 

249 self.stack = stack 

250 self.lexer = None 

251 self.parser = None 

252 

253 def __getitem__(self, n): 

254 if isinstance(n, slice): 

255 return [s.value for s in self.slice[n]] 

256 elif n >= 0: 

257 return self.slice[n].value 

258 else: 

259 return self.stack[n].value 

260 

261 def __setitem__(self, n, v): 

262 self.slice[n].value = v 

263 

264 def __getslice__(self, i, j): 

265 return [s.value for s in self.slice[i:j]] 

266 

267 def __len__(self): 

268 return len(self.slice) 

269 

270 def lineno(self, n): 

271 return getattr(self.slice[n], "lineno", 0) 

272 

273 def set_lineno(self, n, lineno): 

274 self.slice[n].lineno = lineno 

275 

276 def linespan(self, n): 

277 startline = getattr(self.slice[n], "lineno", 0) 

278 endline = getattr(self.slice[n], "endlineno", startline) 

279 return startline, endline 

280 

281 def lexpos(self, n): 

282 return getattr(self.slice[n], "lexpos", 0) 

283 

284 def set_lexpos(self, n, lexpos): 

285 self.slice[n].lexpos = lexpos 

286 

287 def lexspan(self, n): 

288 startpos = getattr(self.slice[n], "lexpos", 0) 

289 endpos = getattr(self.slice[n], "endlexpos", startpos) 

290 return startpos, endpos 

291 

292 def error(self): 

293 raise SyntaxError 

294 

295 

296# ----------------------------------------------------------------------------- 

297# == LRParser == 

298# 

299# The LR Parsing engine. 

300# ----------------------------------------------------------------------------- 

301 

302 

303class LRParser: 

304 def __init__(self, lrtab, errorf): 

305 self.productions = lrtab.lr_productions 

306 self.action = lrtab.lr_action 

307 self.goto = lrtab.lr_goto 

308 self.errorfunc = errorf 

309 self.set_defaulted_states() 

310 self.errorok = True 

311 

312 def errok(self): 

313 self.errorok = True 

314 

315 def restart(self): 

316 del self.statestack[:] 

317 del self.symstack[:] 

318 sym = YaccSymbol() 

319 sym.type = "$end" 

320 self.symstack.append(sym) 

321 self.statestack.append(0) 

322 

323 # Defaulted state support. 

324 # This method identifies parser states where there is only one possible reduction action. 

325 # For such states, the parser can make a choose to make a rule reduction without consuming 

326 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in 

327 # certain kinds of advanced parsing situations where the lexer and parser interact with 

328 # each other or change states (i.e., manipulation of scope, lexer states, etc.). 

329 # 

330 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions 

331 def set_defaulted_states(self): 

332 self.defaulted_states = {} 

333 for state, actions in self.action.items(): 

334 rules = list(actions.values()) 

335 if len(rules) == 1 and rules[0] < 0: 

336 self.defaulted_states[state] = rules[0] 

337 

338 def disable_defaulted_states(self): 

339 self.defaulted_states = {} 

340 

341 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

342 if debug or yaccdevel: 

343 if isinstance(debug, int): 

344 debug = PlyLogger(sys.stderr) 

345 return self.parsedebug(input, lexer, debug, tracking, tokenfunc) 

346 elif tracking: 

347 return self.parseopt(input, lexer, debug, tracking, tokenfunc) 

348 else: 

349 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) 

350 

351 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

352 # parsedebug(). 

353 # 

354 # This is the debugging enabled version of parse(). All changes made to the 

355 # parsing engine should be made here. Optimized versions of this function 

356 # are automatically created by the ply/ygen.py script. This script cuts out 

357 # sections enclosed in markers such as this: 

358 # 

359 # #--! DEBUG 

360 # statements 

361 # #--! DEBUG 

362 # 

363 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

364 

365 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

366 # --! parsedebug-start 

367 lookahead = None # Current lookahead symbol 

368 lookaheadstack = [] # Stack of lookahead symbols 

369 actions = self.action # Local reference to action table (to avoid lookup on self.) 

370 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

371 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

372 defaulted_states = self.defaulted_states # Local reference to defaulted states 

373 pslice = YaccProduction(None) # Production object passed to grammar rules 

374 errorcount = 0 # Used during error recovery 

375 

376 # --! DEBUG 

377 debug.info("PLY: PARSE DEBUG START") 

378 # --! DEBUG 

379 

380 # If no lexer was given, we will try to use the lex module 

381 if not lexer: 

382 from . import lex 

383 

384 lexer = lex.lexer 

385 

386 # Set up the lexer and parser objects on pslice 

387 pslice.lexer = lexer 

388 pslice.parser = self 

389 

390 # If input was supplied, pass to lexer 

391 if input is not None: 

392 lexer.input(input) 

393 

394 if tokenfunc is None: 

395 # Tokenize function 

396 get_token = lexer.token 

397 else: 

398 get_token = tokenfunc 

399 

400 # Set the parser() token method (sometimes used in error recovery) 

401 self.token = get_token 

402 

403 # Set up the state and symbol stacks 

404 

405 statestack = [] # Stack of parsing states 

406 self.statestack = statestack 

407 symstack = [] # Stack of grammar symbols 

408 self.symstack = symstack 

409 

410 pslice.stack = symstack # Put in the production 

411 errtoken = None # Err token 

412 

413 # The start state is assumed to be (0,$end) 

414 

415 statestack.append(0) 

416 sym = YaccSymbol() 

417 sym.type = "$end" 

418 symstack.append(sym) 

419 state = 0 

420 while True: 

421 # Get the next symbol on the input. If a lookahead symbol 

422 # is already set, we just use that. Otherwise, we'll pull 

423 # the next token off of the lookaheadstack or from the lexer 

424 

425 # --! DEBUG 

426 debug.debug("") 

427 debug.debug("State : %s", state) 

428 # --! DEBUG 

429 

430 if state not in defaulted_states: 

431 if not lookahead: 

432 if not lookaheadstack: 

433 lookahead = get_token() # Get the next token 

434 else: 

435 lookahead = lookaheadstack.pop() 

436 if not lookahead: 

437 lookahead = YaccSymbol() 

438 lookahead.type = "$end" 

439 

440 # Check the action table 

441 ltype = lookahead.type 

442 t = actions[state].get(ltype) 

443 else: 

444 t = defaulted_states[state] 

445 # --! DEBUG 

446 debug.debug("Defaulted state %s: Reduce using %d", state, -t) 

447 # --! DEBUG 

448 

449 # --! DEBUG 

450 debug.debug( 

451 "Stack : %s", 

452 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip(), 

453 ) 

454 # --! DEBUG 

455 

456 if t is not None: 

457 if t > 0: 

458 # shift a symbol on the stack 

459 statestack.append(t) 

460 state = t 

461 

462 # --! DEBUG 

463 debug.debug("Action : Shift and goto state %s", t) 

464 # --! DEBUG 

465 

466 symstack.append(lookahead) 

467 lookahead = None 

468 

469 # Decrease error count on successful shift 

470 if errorcount: 

471 errorcount -= 1 

472 continue 

473 

474 if t < 0: 

475 # reduce a symbol on the stack, emit a production 

476 p = prod[-t] 

477 pname = p.name 

478 plen = p.len 

479 

480 # Get production function 

481 sym = YaccSymbol() 

482 sym.type = pname # Production name 

483 sym.value = None 

484 

485 # --! DEBUG 

486 if plen: 

487 debug.info( 

488 "Action : Reduce rule [%s] with %s and goto state %d", 

489 p.str, 

490 "[" + ",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]]) + "]", 

491 goto[statestack[-1 - plen]][pname], 

492 ) 

493 else: 

494 debug.info( 

495 "Action : Reduce rule [%s] with %s and goto state %d", 

496 p.str, 

497 [], 

498 goto[statestack[-1]][pname], 

499 ) 

500 

501 # --! DEBUG 

502 

503 if plen: 

504 targ = symstack[-plen - 1 :] 

505 targ[0] = sym 

506 

507 # --! TRACKING 

508 if tracking: 

509 t1 = targ[1] 

510 sym.lineno = t1.lineno 

511 sym.lexpos = t1.lexpos 

512 t1 = targ[-1] 

513 sym.endlineno = getattr(t1, "endlineno", t1.lineno) 

514 sym.endlexpos = getattr(t1, "endlexpos", t1.lexpos) 

515 # --! TRACKING 

516 

517 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

518 # The code enclosed in this section is duplicated 

519 # below as a performance optimization. Make sure 

520 # changes get made in both locations. 

521 

522 pslice.slice = targ 

523 

524 try: 

525 # Call the grammar rule with our special slice object 

526 del symstack[-plen:] 

527 self.state = state 

528 p.callable(pslice) 

529 del statestack[-plen:] 

530 # --! DEBUG 

531 debug.info("Result : %s", format_result(pslice[0])) 

532 # --! DEBUG 

533 symstack.append(sym) 

534 state = goto[statestack[-1]][pname] 

535 statestack.append(state) 

536 except SyntaxError: 

537 # If an error was set. Enter error recovery state 

538 lookaheadstack.append(lookahead) # Save the current lookahead token 

539 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

540 statestack.pop() # Pop back one state (before the reduce) 

541 state = statestack[-1] 

542 sym.type = "error" 

543 sym.value = "error" 

544 lookahead = sym 

545 errorcount = error_count 

546 self.errorok = False 

547 

548 continue 

549 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

550 

551 else: 

552 

553 # --! TRACKING 

554 if tracking: 

555 sym.lineno = lexer.lineno 

556 sym.lexpos = lexer.lexpos 

557 # --! TRACKING 

558 

559 targ = [sym] 

560 

561 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

562 # The code enclosed in this section is duplicated 

563 # above as a performance optimization. Make sure 

564 # changes get made in both locations. 

565 

566 pslice.slice = targ 

567 

568 try: 

569 # Call the grammar rule with our special slice object 

570 self.state = state 

571 p.callable(pslice) 

572 # --! DEBUG 

573 debug.info("Result : %s", format_result(pslice[0])) 

574 # --! DEBUG 

575 symstack.append(sym) 

576 state = goto[statestack[-1]][pname] 

577 statestack.append(state) 

578 except SyntaxError: 

579 # If an error was set. Enter error recovery state 

580 lookaheadstack.append(lookahead) # Save the current lookahead token 

581 statestack.pop() # Pop back one state (before the reduce) 

582 state = statestack[-1] 

583 sym.type = "error" 

584 sym.value = "error" 

585 lookahead = sym 

586 errorcount = error_count 

587 self.errorok = False 

588 

589 continue 

590 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

591 

592 if t == 0: 

593 n = symstack[-1] 

594 result = getattr(n, "value", None) 

595 # --! DEBUG 

596 debug.info("Done : Returning %s", format_result(result)) 

597 debug.info("PLY: PARSE DEBUG END") 

598 # --! DEBUG 

599 return result 

600 

601 if t is None: 

602 

603 # --! DEBUG 

604 debug.error( 

605 "Error : %s", 

606 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip(), 

607 ) 

608 # --! DEBUG 

609 

610 # We have some kind of parsing error here. To handle 

611 # this, we are going to push the current token onto 

612 # the tokenstack and replace it with an 'error' token. 

613 # If there are any synchronization rules, they may 

614 # catch it. 

615 # 

616 # In addition to pushing the error token, we call call 

617 # the user defined p_error() function if this is the 

618 # first syntax error. This function is only called if 

619 # errorcount == 0. 

620 if errorcount == 0 or self.errorok: 

621 errorcount = error_count 

622 self.errorok = False 

623 errtoken = lookahead 

624 if errtoken.type == "$end": 

625 errtoken = None # End of file! 

626 if self.errorfunc: 

627 if errtoken and not hasattr(errtoken, "lexer"): 

628 errtoken.lexer = lexer 

629 self.state = state 

630 tok = call_errorfunc(self.errorfunc, errtoken, self) 

631 if self.errorok: 

632 # User must have done some kind of panic 

633 # mode recovery on their own. The 

634 # returned token is the next lookahead 

635 lookahead = tok 

636 errtoken = None 

637 continue 

638 else: 

639 if errtoken: 

640 if hasattr(errtoken, "lineno"): 

641 lineno = lookahead.lineno 

642 else: 

643 lineno = 0 

644 if lineno: 

645 sys.stderr.write( 

646 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type) 

647 ) 

648 else: 

649 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 

650 else: 

651 sys.stderr.write("yacc: Parse error in input. EOF\n") 

652 return 

653 

654 else: 

655 errorcount = error_count 

656 

657 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

658 # entire parse has been rolled back and we're completely hosed. The token is 

659 # discarded and we just keep going. 

660 

661 if len(statestack) <= 1 and lookahead.type != "$end": 

662 lookahead = None 

663 errtoken = None 

664 state = 0 

665 # Nuke the pushback stack 

666 del lookaheadstack[:] 

667 continue 

668 

669 # case 2: the statestack has a couple of entries on it, but we're 

670 # at the end of the file. nuke the top entry and generate an error token 

671 

672 # Start nuking entries on the stack 

673 if lookahead.type == "$end": 

674 # Whoa. We're really hosed here. Bail out 

675 return 

676 

677 if lookahead.type != "error": 

678 sym = symstack[-1] 

679 if sym.type == "error": 

680 # Hmmm. Error is on top of stack, we'll just nuke input 

681 # symbol and continue 

682 # --! TRACKING 

683 if tracking: 

684 sym.endlineno = getattr(lookahead, "lineno", sym.lineno) 

685 sym.endlexpos = getattr(lookahead, "lexpos", sym.lexpos) 

686 # --! TRACKING 

687 lookahead = None 

688 continue 

689 

690 # Create the error symbol for the first time and make it the new lookahead symbol 

691 t = YaccSymbol() 

692 t.type = "error" 

693 

694 if hasattr(lookahead, "lineno"): 

695 t.lineno = t.endlineno = lookahead.lineno 

696 if hasattr(lookahead, "lexpos"): 

697 t.lexpos = t.endlexpos = lookahead.lexpos 

698 t.value = lookahead 

699 lookaheadstack.append(lookahead) 

700 lookahead = t 

701 else: 

702 sym = symstack.pop() 

703 # --! TRACKING 

704 if tracking: 

705 lookahead.lineno = sym.lineno 

706 lookahead.lexpos = sym.lexpos 

707 # --! TRACKING 

708 statestack.pop() 

709 state = statestack[-1] 

710 

711 continue 

712 

713 # Call an error function here 

714 raise RuntimeError("yacc: internal parser error!!!\n") 

715 

716 # --! parsedebug-end 

717 

718 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

719 # parseopt(). 

720 # 

721 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! 

722 # This code is automatically generated by the ply/ygen.py script. Make 

723 # changes to the parsedebug() method instead. 

724 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

725 

726 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

727 # --! parseopt-start 

728 lookahead = None # Current lookahead symbol 

729 lookaheadstack = [] # Stack of lookahead symbols 

730 actions = self.action # Local reference to action table (to avoid lookup on self.) 

731 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

732 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

733 defaulted_states = self.defaulted_states # Local reference to defaulted states 

734 pslice = YaccProduction(None) # Production object passed to grammar rules 

735 errorcount = 0 # Used during error recovery 

736 

737 # If no lexer was given, we will try to use the lex module 

738 if not lexer: 

739 from . import lex 

740 

741 lexer = lex.lexer 

742 

743 # Set up the lexer and parser objects on pslice 

744 pslice.lexer = lexer 

745 pslice.parser = self 

746 

747 # If input was supplied, pass to lexer 

748 if input is not None: 

749 lexer.input(input) 

750 

751 if tokenfunc is None: 

752 # Tokenize function 

753 get_token = lexer.token 

754 else: 

755 get_token = tokenfunc 

756 

757 # Set the parser() token method (sometimes used in error recovery) 

758 self.token = get_token 

759 

760 # Set up the state and symbol stacks 

761 

762 statestack = [] # Stack of parsing states 

763 self.statestack = statestack 

764 symstack = [] # Stack of grammar symbols 

765 self.symstack = symstack 

766 

767 pslice.stack = symstack # Put in the production 

768 errtoken = None # Err token 

769 

770 # The start state is assumed to be (0,$end) 

771 

772 statestack.append(0) 

773 sym = YaccSymbol() 

774 sym.type = "$end" 

775 symstack.append(sym) 

776 state = 0 

777 while True: 

778 # Get the next symbol on the input. If a lookahead symbol 

779 # is already set, we just use that. Otherwise, we'll pull 

780 # the next token off of the lookaheadstack or from the lexer 

781 

782 if state not in defaulted_states: 

783 if not lookahead: 

784 if not lookaheadstack: 

785 lookahead = get_token() # Get the next token 

786 else: 

787 lookahead = lookaheadstack.pop() 

788 if not lookahead: 

789 lookahead = YaccSymbol() 

790 lookahead.type = "$end" 

791 

792 # Check the action table 

793 ltype = lookahead.type 

794 t = actions[state].get(ltype) 

795 else: 

796 t = defaulted_states[state] 

797 

798 if t is not None: 

799 if t > 0: 

800 # shift a symbol on the stack 

801 statestack.append(t) 

802 state = t 

803 

804 symstack.append(lookahead) 

805 lookahead = None 

806 

807 # Decrease error count on successful shift 

808 if errorcount: 

809 errorcount -= 1 

810 continue 

811 

812 if t < 0: 

813 # reduce a symbol on the stack, emit a production 

814 p = prod[-t] 

815 pname = p.name 

816 plen = p.len 

817 

818 # Get production function 

819 sym = YaccSymbol() 

820 sym.type = pname # Production name 

821 sym.value = None 

822 

823 if plen: 

824 targ = symstack[-plen - 1 :] 

825 targ[0] = sym 

826 

827 # --! TRACKING 

828 if tracking: 

829 t1 = targ[1] 

830 sym.lineno = t1.lineno 

831 sym.lexpos = t1.lexpos 

832 t1 = targ[-1] 

833 sym.endlineno = getattr(t1, "endlineno", t1.lineno) 

834 sym.endlexpos = getattr(t1, "endlexpos", t1.lexpos) 

835 # --! TRACKING 

836 

837 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

838 # The code enclosed in this section is duplicated 

839 # below as a performance optimization. Make sure 

840 # changes get made in both locations. 

841 

842 pslice.slice = targ 

843 

844 try: 

845 # Call the grammar rule with our special slice object 

846 del symstack[-plen:] 

847 self.state = state 

848 p.callable(pslice) 

849 del statestack[-plen:] 

850 symstack.append(sym) 

851 state = goto[statestack[-1]][pname] 

852 statestack.append(state) 

853 except SyntaxError: 

854 # If an error was set. Enter error recovery state 

855 lookaheadstack.append(lookahead) # Save the current lookahead token 

856 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

857 statestack.pop() # Pop back one state (before the reduce) 

858 state = statestack[-1] 

859 sym.type = "error" 

860 sym.value = "error" 

861 lookahead = sym 

862 errorcount = error_count 

863 self.errorok = False 

864 

865 continue 

866 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

867 

868 else: 

869 

870 # --! TRACKING 

871 if tracking: 

872 sym.lineno = lexer.lineno 

873 sym.lexpos = lexer.lexpos 

874 # --! TRACKING 

875 

876 targ = [sym] 

877 

878 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

879 # The code enclosed in this section is duplicated 

880 # above as a performance optimization. Make sure 

881 # changes get made in both locations. 

882 

883 pslice.slice = targ 

884 

885 try: 

886 # Call the grammar rule with our special slice object 

887 self.state = state 

888 p.callable(pslice) 

889 symstack.append(sym) 

890 state = goto[statestack[-1]][pname] 

891 statestack.append(state) 

892 except SyntaxError: 

893 # If an error was set. Enter error recovery state 

894 lookaheadstack.append(lookahead) # Save the current lookahead token 

895 statestack.pop() # Pop back one state (before the reduce) 

896 state = statestack[-1] 

897 sym.type = "error" 

898 sym.value = "error" 

899 lookahead = sym 

900 errorcount = error_count 

901 self.errorok = False 

902 

903 continue 

904 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

905 

906 if t == 0: 

907 n = symstack[-1] 

908 result = getattr(n, "value", None) 

909 return result 

910 

911 if t is None: 

912 

913 # We have some kind of parsing error here. To handle 

914 # this, we are going to push the current token onto 

915 # the tokenstack and replace it with an 'error' token. 

916 # If there are any synchronization rules, they may 

917 # catch it. 

918 # 

919 # In addition to pushing the error token, we call call 

920 # the user defined p_error() function if this is the 

921 # first syntax error. This function is only called if 

922 # errorcount == 0. 

923 if errorcount == 0 or self.errorok: 

924 errorcount = error_count 

925 self.errorok = False 

926 errtoken = lookahead 

927 if errtoken.type == "$end": 

928 errtoken = None # End of file! 

929 if self.errorfunc: 

930 if errtoken and not hasattr(errtoken, "lexer"): 

931 errtoken.lexer = lexer 

932 self.state = state 

933 tok = call_errorfunc(self.errorfunc, errtoken, self) 

934 if self.errorok: 

935 # User must have done some kind of panic 

936 # mode recovery on their own. The 

937 # returned token is the next lookahead 

938 lookahead = tok 

939 errtoken = None 

940 continue 

941 else: 

942 if errtoken: 

943 if hasattr(errtoken, "lineno"): 

944 lineno = lookahead.lineno 

945 else: 

946 lineno = 0 

947 if lineno: 

948 sys.stderr.write( 

949 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type) 

950 ) 

951 else: 

952 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 

953 else: 

954 sys.stderr.write("yacc: Parse error in input. EOF\n") 

955 return 

956 

957 else: 

958 errorcount = error_count 

959 

960 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

961 # entire parse has been rolled back and we're completely hosed. The token is 

962 # discarded and we just keep going. 

963 

964 if len(statestack) <= 1 and lookahead.type != "$end": 

965 lookahead = None 

966 errtoken = None 

967 state = 0 

968 # Nuke the pushback stack 

969 del lookaheadstack[:] 

970 continue 

971 

972 # case 2: the statestack has a couple of entries on it, but we're 

973 # at the end of the file. nuke the top entry and generate an error token 

974 

975 # Start nuking entries on the stack 

976 if lookahead.type == "$end": 

977 # Whoa. We're really hosed here. Bail out 

978 return 

979 

980 if lookahead.type != "error": 

981 sym = symstack[-1] 

982 if sym.type == "error": 

983 # Hmmm. Error is on top of stack, we'll just nuke input 

984 # symbol and continue 

985 # --! TRACKING 

986 if tracking: 

987 sym.endlineno = getattr(lookahead, "lineno", sym.lineno) 

988 sym.endlexpos = getattr(lookahead, "lexpos", sym.lexpos) 

989 # --! TRACKING 

990 lookahead = None 

991 continue 

992 

993 # Create the error symbol for the first time and make it the new lookahead symbol 

994 t = YaccSymbol() 

995 t.type = "error" 

996 

997 if hasattr(lookahead, "lineno"): 

998 t.lineno = t.endlineno = lookahead.lineno 

999 if hasattr(lookahead, "lexpos"): 

1000 t.lexpos = t.endlexpos = lookahead.lexpos 

1001 t.value = lookahead 

1002 lookaheadstack.append(lookahead) 

1003 lookahead = t 

1004 else: 

1005 sym = symstack.pop() 

1006 # --! TRACKING 

1007 if tracking: 

1008 lookahead.lineno = sym.lineno 

1009 lookahead.lexpos = sym.lexpos 

1010 # --! TRACKING 

1011 statestack.pop() 

1012 state = statestack[-1] 

1013 

1014 continue 

1015 

1016 # Call an error function here 

1017 raise RuntimeError("yacc: internal parser error!!!\n") 

1018 

1019 # --! parseopt-end 

1020 

1021 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1022 # parseopt_notrack(). 

1023 # 

1024 # Optimized version of parseopt() with line number tracking removed. 

1025 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated 

1026 # by the ply/ygen.py script. Make changes to the parsedebug() method instead. 

1027 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1028 

1029 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

1030 # --! parseopt-notrack-start 

1031 lookahead = None # Current lookahead symbol 

1032 lookaheadstack = [] # Stack of lookahead symbols 

1033 actions = self.action # Local reference to action table (to avoid lookup on self.) 

1034 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

1035 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

1036 defaulted_states = self.defaulted_states # Local reference to defaulted states 

1037 pslice = YaccProduction(None) # Production object passed to grammar rules 

1038 errorcount = 0 # Used during error recovery 

1039 

1040 # If no lexer was given, we will try to use the lex module 

1041 if not lexer: 

1042 from . import lex 

1043 

1044 lexer = lex.lexer 

1045 

1046 # Set up the lexer and parser objects on pslice 

1047 pslice.lexer = lexer 

1048 pslice.parser = self 

1049 

1050 # If input was supplied, pass to lexer 

1051 if input is not None: 

1052 lexer.input(input) 

1053 

1054 if tokenfunc is None: 

1055 # Tokenize function 

1056 get_token = lexer.token 

1057 else: 

1058 get_token = tokenfunc 

1059 

1060 # Set the parser() token method (sometimes used in error recovery) 

1061 self.token = get_token 

1062 

1063 # Set up the state and symbol stacks 

1064 

1065 statestack = [] # Stack of parsing states 

1066 self.statestack = statestack 

1067 symstack = [] # Stack of grammar symbols 

1068 self.symstack = symstack 

1069 

1070 pslice.stack = symstack # Put in the production 

1071 errtoken = None # Err token 

1072 

1073 # The start state is assumed to be (0,$end) 

1074 

1075 statestack.append(0) 

1076 sym = YaccSymbol() 

1077 sym.type = "$end" 

1078 symstack.append(sym) 

1079 state = 0 

1080 while True: 

1081 # Get the next symbol on the input. If a lookahead symbol 

1082 # is already set, we just use that. Otherwise, we'll pull 

1083 # the next token off of the lookaheadstack or from the lexer 

1084 

1085 if state not in defaulted_states: 

1086 if not lookahead: 

1087 if not lookaheadstack: 

1088 lookahead = get_token() # Get the next token 

1089 else: 

1090 lookahead = lookaheadstack.pop() 

1091 if not lookahead: 

1092 lookahead = YaccSymbol() 

1093 lookahead.type = "$end" 

1094 

1095 # Check the action table 

1096 ltype = lookahead.type 

1097 t = actions[state].get(ltype) 

1098 else: 

1099 t = defaulted_states[state] 

1100 

1101 if t is not None: 

1102 if t > 0: 

1103 # shift a symbol on the stack 

1104 statestack.append(t) 

1105 state = t 

1106 

1107 symstack.append(lookahead) 

1108 lookahead = None 

1109 

1110 # Decrease error count on successful shift 

1111 if errorcount: 

1112 errorcount -= 1 

1113 continue 

1114 

1115 if t < 0: 

1116 # reduce a symbol on the stack, emit a production 

1117 p = prod[-t] 

1118 pname = p.name 

1119 plen = p.len 

1120 

1121 # Get production function 

1122 sym = YaccSymbol() 

1123 sym.type = pname # Production name 

1124 sym.value = None 

1125 

1126 if plen: 

1127 targ = symstack[-plen - 1 :] 

1128 targ[0] = sym 

1129 

1130 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1131 # The code enclosed in this section is duplicated 

1132 # below as a performance optimization. Make sure 

1133 # changes get made in both locations. 

1134 

1135 pslice.slice = targ 

1136 

1137 try: 

1138 # Call the grammar rule with our special slice object 

1139 del symstack[-plen:] 

1140 self.state = state 

1141 p.callable(pslice) 

1142 del statestack[-plen:] 

1143 symstack.append(sym) 

1144 state = goto[statestack[-1]][pname] 

1145 statestack.append(state) 

1146 except SyntaxError: 

1147 # If an error was set. Enter error recovery state 

1148 lookaheadstack.append(lookahead) # Save the current lookahead token 

1149 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

1150 statestack.pop() # Pop back one state (before the reduce) 

1151 state = statestack[-1] 

1152 sym.type = "error" 

1153 sym.value = "error" 

1154 lookahead = sym 

1155 errorcount = error_count 

1156 self.errorok = False 

1157 

1158 continue 

1159 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1160 

1161 else: 

1162 

1163 targ = [sym] 

1164 

1165 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1166 # The code enclosed in this section is duplicated 

1167 # above as a performance optimization. Make sure 

1168 # changes get made in both locations. 

1169 

1170 pslice.slice = targ 

1171 

1172 try: 

1173 # Call the grammar rule with our special slice object 

1174 self.state = state 

1175 p.callable(pslice) 

1176 symstack.append(sym) 

1177 state = goto[statestack[-1]][pname] 

1178 statestack.append(state) 

1179 except SyntaxError: 

1180 # If an error was set. Enter error recovery state 

1181 lookaheadstack.append(lookahead) # Save the current lookahead token 

1182 statestack.pop() # Pop back one state (before the reduce) 

1183 state = statestack[-1] 

1184 sym.type = "error" 

1185 sym.value = "error" 

1186 lookahead = sym 

1187 errorcount = error_count 

1188 self.errorok = False 

1189 

1190 continue 

1191 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1192 

1193 if t == 0: 

1194 n = symstack[-1] 

1195 result = getattr(n, "value", None) 

1196 return result 

1197 

1198 if t is None: 

1199 

1200 # We have some kind of parsing error here. To handle 

1201 # this, we are going to push the current token onto 

1202 # the tokenstack and replace it with an 'error' token. 

1203 # If there are any synchronization rules, they may 

1204 # catch it. 

1205 # 

1206 # In addition to pushing the error token, we call call 

1207 # the user defined p_error() function if this is the 

1208 # first syntax error. This function is only called if 

1209 # errorcount == 0. 

1210 if errorcount == 0 or self.errorok: 

1211 errorcount = error_count 

1212 self.errorok = False 

1213 errtoken = lookahead 

1214 if errtoken.type == "$end": 

1215 errtoken = None # End of file! 

1216 if self.errorfunc: 

1217 if errtoken and not hasattr(errtoken, "lexer"): 

1218 errtoken.lexer = lexer 

1219 self.state = state 

1220 tok = call_errorfunc(self.errorfunc, errtoken, self) 

1221 if self.errorok: 

1222 # User must have done some kind of panic 

1223 # mode recovery on their own. The 

1224 # returned token is the next lookahead 

1225 lookahead = tok 

1226 errtoken = None 

1227 continue 

1228 else: 

1229 if errtoken: 

1230 if hasattr(errtoken, "lineno"): 

1231 lineno = lookahead.lineno 

1232 else: 

1233 lineno = 0 

1234 if lineno: 

1235 sys.stderr.write( 

1236 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type) 

1237 ) 

1238 else: 

1239 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 

1240 else: 

1241 sys.stderr.write("yacc: Parse error in input. EOF\n") 

1242 return 

1243 

1244 else: 

1245 errorcount = error_count 

1246 

1247 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

1248 # entire parse has been rolled back and we're completely hosed. The token is 

1249 # discarded and we just keep going. 

1250 

1251 if len(statestack) <= 1 and lookahead.type != "$end": 

1252 lookahead = None 

1253 errtoken = None 

1254 state = 0 

1255 # Nuke the pushback stack 

1256 del lookaheadstack[:] 

1257 continue 

1258 

1259 # case 2: the statestack has a couple of entries on it, but we're 

1260 # at the end of the file. nuke the top entry and generate an error token 

1261 

1262 # Start nuking entries on the stack 

1263 if lookahead.type == "$end": 

1264 # Whoa. We're really hosed here. Bail out 

1265 return 

1266 

1267 if lookahead.type != "error": 

1268 sym = symstack[-1] 

1269 if sym.type == "error": 

1270 # Hmmm. Error is on top of stack, we'll just nuke input 

1271 # symbol and continue 

1272 lookahead = None 

1273 continue 

1274 

1275 # Create the error symbol for the first time and make it the new lookahead symbol 

1276 t = YaccSymbol() 

1277 t.type = "error" 

1278 

1279 if hasattr(lookahead, "lineno"): 

1280 t.lineno = t.endlineno = lookahead.lineno 

1281 if hasattr(lookahead, "lexpos"): 

1282 t.lexpos = t.endlexpos = lookahead.lexpos 

1283 t.value = lookahead 

1284 lookaheadstack.append(lookahead) 

1285 lookahead = t 

1286 else: 

1287 sym = symstack.pop() 

1288 statestack.pop() 

1289 state = statestack[-1] 

1290 

1291 continue 

1292 

1293 # Call an error function here 

1294 raise RuntimeError("yacc: internal parser error!!!\n") 

1295 

1296 # --! parseopt-notrack-end 

1297 

1298 

1299# ----------------------------------------------------------------------------- 

1300# === Grammar Representation === 

1301# 

1302# The following functions, classes, and variables are used to represent and 

1303# manipulate the rules that make up a grammar. 

1304# ----------------------------------------------------------------------------- 

1305 

1306# regex matching identifiers 

1307_is_identifier = re.compile(r"^[a-zA-Z0-9_-]+$") 

1308 

1309# ----------------------------------------------------------------------------- 

1310# class Production: 

1311# 

1312# This class stores the raw information about a single production or grammar rule. 

1313# A grammar rule refers to a specification such as this: 

1314# 

1315# expr : expr PLUS term 

1316# 

1317# Here are the basic attributes defined on all productions 

1318# 

1319# name - Name of the production. For example 'expr' 

1320# prod - A list of symbols on the right side ['expr','PLUS','term'] 

1321# prec - Production precedence level 

1322# number - Production number. 

1323# func - Function that executes on reduce 

1324# file - File where production function is defined 

1325# lineno - Line number where production function is defined 

1326# 

1327# The following attributes are defined or optional. 

1328# 

1329# len - Length of the production (number of symbols on right hand side) 

1330# usyms - Set of unique symbols found in the production 

1331# ----------------------------------------------------------------------------- 

1332 

1333 

1334class Production(object): 

1335 reduced = 0 

1336 

1337 def __init__(self, number, name, prod, precedence=("right", 0), func=None, file="", line=0): 

1338 self.name = name 

1339 self.prod = tuple(prod) 

1340 self.number = number 

1341 self.func = func 

1342 self.callable = None 

1343 self.file = file 

1344 self.line = line 

1345 self.prec = precedence 

1346 

1347 # Internal settings used during table construction 

1348 

1349 self.len = len(self.prod) # Length of the production 

1350 

1351 # Create a list of unique production symbols used in the production 

1352 self.usyms = [] 

1353 for s in self.prod: 

1354 if s not in self.usyms: 

1355 self.usyms.append(s) 

1356 

1357 # List of all LR items for the production 

1358 self.lr_items = [] 

1359 self.lr_next = None 

1360 

1361 # Create a string representation 

1362 if self.prod: 

1363 self.str = "%s -> %s" % (self.name, " ".join(self.prod)) 

1364 else: 

1365 self.str = "%s -> <empty>" % self.name 

1366 

1367 def __str__(self): 

1368 return self.str 

1369 

1370 def __repr__(self): 

1371 return "Production(" + str(self) + ")" 

1372 

1373 def __len__(self): 

1374 return len(self.prod) 

1375 

1376 def __nonzero__(self): 

1377 return 1 

1378 

1379 def __getitem__(self, index): 

1380 return self.prod[index] 

1381 

1382 # Return the nth lr_item from the production (or None if at the end) 

1383 def lr_item(self, n): 

1384 if n > len(self.prod): 

1385 return None 

1386 p = LRItem(self, n) 

1387 # Precompute the list of productions immediately following. 

1388 try: 

1389 p.lr_after = self.Prodnames[p.prod[n + 1]] 

1390 except (IndexError, KeyError): 

1391 p.lr_after = [] 

1392 try: 

1393 p.lr_before = p.prod[n - 1] 

1394 except IndexError: 

1395 p.lr_before = None 

1396 return p 

1397 

1398 # Bind the production function name to a callable 

1399 def bind(self, pdict): 

1400 if self.func: 

1401 self.callable = pdict[self.func] 

1402 

1403 

1404# This class serves as a minimal standin for Production objects when 

1405# reading table data from files. It only contains information 

1406# actually used by the LR parsing engine, plus some additional 

1407# debugging information. 

1408class MiniProduction(object): 

1409 def __init__(self, str, name, len, func, file, line): 

1410 self.name = name 

1411 self.len = len 

1412 self.func = func 

1413 self.callable = None 

1414 self.file = file 

1415 self.line = line 

1416 self.str = str 

1417 

1418 def __str__(self): 

1419 return self.str 

1420 

1421 def __repr__(self): 

1422 return "MiniProduction(%s)" % self.str 

1423 

1424 # Bind the production function name to a callable 

1425 def bind(self, pdict): 

1426 if self.func: 

1427 self.callable = pdict[self.func] 

1428 

1429 

1430# ----------------------------------------------------------------------------- 

1431# class LRItem 

1432# 

1433# This class represents a specific stage of parsing a production rule. For 

1434# example: 

1435# 

1436# expr : expr . PLUS term 

1437# 

1438# In the above, the "." represents the current location of the parse. Here 

1439# basic attributes: 

1440# 

1441# name - Name of the production. For example 'expr' 

1442# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 

1443# number - Production number. 

1444# 

1445# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 

1446# then lr_next refers to 'expr -> expr PLUS . term' 

1447# lr_index - LR item index (location of the ".") in the prod list. 

1448# lookaheads - LALR lookahead symbols for this item 

1449# len - Length of the production (number of symbols on right hand side) 

1450# lr_after - List of all productions that immediately follow 

1451# lr_before - Grammar symbol immediately before 

1452# ----------------------------------------------------------------------------- 

1453 

1454 

1455class LRItem(object): 

1456 def __init__(self, p, n): 

1457 self.name = p.name 

1458 self.prod = list(p.prod) 

1459 self.number = p.number 

1460 self.lr_index = n 

1461 self.lookaheads = {} 

1462 self.prod.insert(n, ".") 

1463 self.prod = tuple(self.prod) 

1464 self.len = len(self.prod) 

1465 self.usyms = p.usyms 

1466 

1467 def __str__(self): 

1468 if self.prod: 

1469 s = "%s -> %s" % (self.name, " ".join(self.prod)) 

1470 else: 

1471 s = "%s -> <empty>" % self.name 

1472 return s 

1473 

1474 def __repr__(self): 

1475 return "LRItem(" + str(self) + ")" 

1476 

1477 

1478# ----------------------------------------------------------------------------- 

1479# rightmost_terminal() 

1480# 

1481# Return the rightmost terminal from a list of symbols. Used in add_production() 

1482# ----------------------------------------------------------------------------- 

1483def rightmost_terminal(symbols, terminals): 

1484 i = len(symbols) - 1 

1485 while i >= 0: 

1486 if symbols[i] in terminals: 

1487 return symbols[i] 

1488 i -= 1 

1489 return None 

1490 

1491 

1492# ----------------------------------------------------------------------------- 

1493# === GRAMMAR CLASS === 

1494# 

1495# The following class represents the contents of the specified grammar along 

1496# with various computed properties such as first sets, follow sets, LR items, etc. 

1497# This data is used for critical parts of the table generation process later. 

1498# ----------------------------------------------------------------------------- 

1499 

1500 

1501class GrammarError(YaccError): 

1502 pass 

1503 

1504 

1505class Grammar(object): 

1506 def __init__(self, terminals): 

1507 self.Productions = [None] # A list of all of the productions. The first 

1508 # entry is always reserved for the purpose of 

1509 # building an augmented grammar 

1510 

1511 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all 

1512 # productions of that nonterminal. 

1513 

1514 self.Prodmap = {} # A dictionary that is only used to detect duplicate 

1515 # productions. 

1516 

1517 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a 

1518 # list of the rules where they are used. 

1519 

1520 for term in terminals: 

1521 self.Terminals[term] = [] 

1522 

1523 self.Terminals["error"] = [] 

1524 

1525 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list 

1526 # of rule numbers where they are used. 

1527 

1528 self.First = {} # A dictionary of precomputed FIRST(x) symbols 

1529 

1530 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols 

1531 

1532 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the 

1533 # form ('right',level) or ('nonassoc', level) or ('left',level) 

1534 

1535 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. 

1536 # This is only used to provide error checking and to generate 

1537 # a warning about unused precedence rules. 

1538 

1539 self.Start = None # Starting symbol for the grammar 

1540 

1541 def __len__(self): 

1542 return len(self.Productions) 

1543 

1544 def __getitem__(self, index): 

1545 return self.Productions[index] 

1546 

1547 # ----------------------------------------------------------------------------- 

1548 # set_precedence() 

1549 # 

1550 # Sets the precedence for a given terminal. assoc is the associativity such as 

1551 # 'left','right', or 'nonassoc'. level is a numeric level. 

1552 # 

1553 # ----------------------------------------------------------------------------- 

1554 

1555 def set_precedence(self, term, assoc, level): 

1556 assert self.Productions == [None], "Must call set_precedence() before add_production()" 

1557 if term in self.Precedence: 

1558 raise GrammarError("Precedence already specified for terminal %r" % term) 

1559 if assoc not in ["left", "right", "nonassoc"]: 

1560 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 

1561 self.Precedence[term] = (assoc, level) 

1562 

1563 # ----------------------------------------------------------------------------- 

1564 # add_production() 

1565 # 

1566 # Given an action function, this function assembles a production rule and 

1567 # computes its precedence level. 

1568 # 

1569 # The production rule is supplied as a list of symbols. For example, 

1570 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 

1571 # symbols ['expr','PLUS','term']. 

1572 # 

1573 # Precedence is determined by the precedence of the right-most non-terminal 

1574 # or the precedence of a terminal specified by %prec. 

1575 # 

1576 # A variety of error checks are performed to make sure production symbols 

1577 # are valid and that %prec is used correctly. 

1578 # ----------------------------------------------------------------------------- 

1579 

1580 def add_production(self, prodname, syms, func=None, file="", line=0): 

1581 

1582 if prodname in self.Terminals: 

1583 raise GrammarError( 

1584 "%s:%d: Illegal rule name %r. Already defined as a token" % (file, line, prodname) 

1585 ) 

1586 if prodname == "error": 

1587 raise GrammarError( 

1588 "%s:%d: Illegal rule name %r. error is a reserved word" % (file, line, prodname) 

1589 ) 

1590 if not _is_identifier.match(prodname): 

1591 raise GrammarError("%s:%d: Illegal rule name %r" % (file, line, prodname)) 

1592 

1593 # Look for literal tokens 

1594 for n, s in enumerate(syms): 

1595 if s[0] in "'\"": 

1596 try: 

1597 c = eval(s) 

1598 if len(c) > 1: 

1599 raise GrammarError( 

1600 "%s:%d: Literal token %s in rule %r may only be a single character" 

1601 % (file, line, s, prodname) 

1602 ) 

1603 if c not in self.Terminals: 

1604 self.Terminals[c] = [] 

1605 syms[n] = c 

1606 continue 

1607 except SyntaxError: 

1608 pass 

1609 if not _is_identifier.match(s) and s != "%prec": 

1610 raise GrammarError("%s:%d: Illegal name %r in rule %r" % (file, line, s, prodname)) 

1611 

1612 # Determine the precedence level 

1613 if "%prec" in syms: 

1614 if syms[-1] == "%prec": 

1615 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file, line)) 

1616 if syms[-2] != "%prec": 

1617 raise GrammarError( 

1618 "%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file, line) 

1619 ) 

1620 precname = syms[-1] 

1621 prodprec = self.Precedence.get(precname) 

1622 if not prodprec: 

1623 raise GrammarError("%s:%d: Nothing known about the precedence of %r" % (file, line, precname)) 

1624 else: 

1625 self.UsedPrecedence.add(precname) 

1626 del syms[-2:] # Drop %prec from the rule 

1627 else: 

1628 # If no %prec, precedence is determined by the rightmost terminal symbol 

1629 precname = rightmost_terminal(syms, self.Terminals) 

1630 prodprec = self.Precedence.get(precname, ("right", 0)) 

1631 

1632 # See if the rule is already in the rulemap 

1633 map = "%s -> %s" % (prodname, syms) 

1634 if map in self.Prodmap: 

1635 m = self.Prodmap[map] 

1636 raise GrammarError( 

1637 "%s:%d: Duplicate rule %s. " % (file, line, m) 

1638 + "Previous definition at %s:%d" % (m.file, m.line) 

1639 ) 

1640 

1641 # From this point on, everything is valid. Create a new Production instance 

1642 pnumber = len(self.Productions) 

1643 if prodname not in self.Nonterminals: 

1644 self.Nonterminals[prodname] = [] 

1645 

1646 # Add the production number to Terminals and Nonterminals 

1647 for t in syms: 

1648 if t in self.Terminals: 

1649 self.Terminals[t].append(pnumber) 

1650 else: 

1651 if t not in self.Nonterminals: 

1652 self.Nonterminals[t] = [] 

1653 self.Nonterminals[t].append(pnumber) 

1654 

1655 # Create a production and add it to the list of productions 

1656 p = Production(pnumber, prodname, syms, prodprec, func, file, line) 

1657 self.Productions.append(p) 

1658 self.Prodmap[map] = p 

1659 

1660 # Add to the global productions list 

1661 try: 

1662 self.Prodnames[prodname].append(p) 

1663 except KeyError: 

1664 self.Prodnames[prodname] = [p] 

1665 

1666 # ----------------------------------------------------------------------------- 

1667 # set_start() 

1668 # 

1669 # Sets the starting symbol and creates the augmented grammar. Production 

1670 # rule 0 is S' -> start where start is the start symbol. 

1671 # ----------------------------------------------------------------------------- 

1672 

1673 def set_start(self, start=None): 

1674 if not start: 

1675 start = self.Productions[1].name 

1676 if start not in self.Nonterminals: 

1677 raise GrammarError("start symbol %s undefined" % start) 

1678 self.Productions[0] = Production(0, "S'", [start]) 

1679 self.Nonterminals[start].append(0) 

1680 self.Start = start 

1681 

1682 # ----------------------------------------------------------------------------- 

1683 # find_unreachable() 

1684 # 

1685 # Find all of the nonterminal symbols that can't be reached from the starting 

1686 # symbol. Returns a list of nonterminals that can't be reached. 

1687 # ----------------------------------------------------------------------------- 

1688 

1689 def find_unreachable(self): 

1690 

1691 # Mark all symbols that are reachable from a symbol s 

1692 def mark_reachable_from(s): 

1693 if s in reachable: 

1694 return 

1695 reachable.add(s) 

1696 for p in self.Prodnames.get(s, []): 

1697 for r in p.prod: 

1698 mark_reachable_from(r) 

1699 

1700 reachable = set() 

1701 mark_reachable_from(self.Productions[0].prod[0]) 

1702 return [s for s in self.Nonterminals if s not in reachable] 

1703 

1704 # ----------------------------------------------------------------------------- 

1705 # infinite_cycles() 

1706 # 

1707 # This function looks at the various parsing rules and tries to detect 

1708 # infinite recursion cycles (grammar rules where there is no possible way 

1709 # to derive a string of only terminals). 

1710 # ----------------------------------------------------------------------------- 

1711 

1712 def infinite_cycles(self): 

1713 terminates = {} 

1714 

1715 # Terminals: 

1716 for t in self.Terminals: 

1717 terminates[t] = True 

1718 

1719 terminates["$end"] = True 

1720 

1721 # Nonterminals: 

1722 

1723 # Initialize to false: 

1724 for n in self.Nonterminals: 

1725 terminates[n] = False 

1726 

1727 # Then propagate termination until no change: 

1728 while True: 

1729 some_change = False 

1730 for (n, pl) in self.Prodnames.items(): 

1731 # Nonterminal n terminates iff any of its productions terminates. 

1732 for p in pl: 

1733 # Production p terminates iff all of its rhs symbols terminate. 

1734 for s in p.prod: 

1735 if not terminates[s]: 

1736 # The symbol s does not terminate, 

1737 # so production p does not terminate. 

1738 p_terminates = False 

1739 break 

1740 else: 

1741 # didn't break from the loop, 

1742 # so every symbol s terminates 

1743 # so production p terminates. 

1744 p_terminates = True 

1745 

1746 if p_terminates: 

1747 # symbol n terminates! 

1748 if not terminates[n]: 

1749 terminates[n] = True 

1750 some_change = True 

1751 # Don't need to consider any more productions for this n. 

1752 break 

1753 

1754 if not some_change: 

1755 break 

1756 

1757 infinite = [] 

1758 for (s, term) in terminates.items(): 

1759 if not term: 

1760 if s not in self.Prodnames and s not in self.Terminals and s != "error": 

1761 # s is used-but-not-defined, and we've already warned of that, 

1762 # so it would be overkill to say that it's also non-terminating. 

1763 pass 

1764 else: 

1765 infinite.append(s) 

1766 

1767 return infinite 

1768 

1769 # ----------------------------------------------------------------------------- 

1770 # undefined_symbols() 

1771 # 

1772 # Find all symbols that were used the grammar, but not defined as tokens or 

1773 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 

1774 # and prod is the production where the symbol was used. 

1775 # ----------------------------------------------------------------------------- 

1776 def undefined_symbols(self): 

1777 result = [] 

1778 for p in self.Productions: 

1779 if not p: 

1780 continue 

1781 

1782 for s in p.prod: 

1783 if s not in self.Prodnames and s not in self.Terminals and s != "error": 

1784 result.append((s, p)) 

1785 return result 

1786 

1787 # ----------------------------------------------------------------------------- 

1788 # unused_terminals() 

1789 # 

1790 # Find all terminals that were defined, but not used by the grammar. Returns 

1791 # a list of all symbols. 

1792 # ----------------------------------------------------------------------------- 

1793 def unused_terminals(self): 

1794 unused_tok = [] 

1795 for s, v in self.Terminals.items(): 

1796 if s != "error" and not v: 

1797 unused_tok.append(s) 

1798 

1799 return unused_tok 

1800 

1801 # ------------------------------------------------------------------------------ 

1802 # unused_rules() 

1803 # 

1804 # Find all grammar rules that were defined, but not used (maybe not reachable) 

1805 # Returns a list of productions. 

1806 # ------------------------------------------------------------------------------ 

1807 

1808 def unused_rules(self): 

1809 unused_prod = [] 

1810 for s, v in self.Nonterminals.items(): 

1811 if not v: 

1812 p = self.Prodnames[s][0] 

1813 unused_prod.append(p) 

1814 return unused_prod 

1815 

1816 # ----------------------------------------------------------------------------- 

1817 # unused_precedence() 

1818 # 

1819 # Returns a list of tuples (term,precedence) corresponding to precedence 

1820 # rules that were never used by the grammar. term is the name of the terminal 

1821 # on which precedence was applied and precedence is a string such as 'left' or 

1822 # 'right' corresponding to the type of precedence. 

1823 # ----------------------------------------------------------------------------- 

1824 

1825 def unused_precedence(self): 

1826 unused = [] 

1827 for termname in self.Precedence: 

1828 if not (termname in self.Terminals or termname in self.UsedPrecedence): 

1829 unused.append((termname, self.Precedence[termname][0])) 

1830 

1831 return unused 

1832 

1833 # ------------------------------------------------------------------------- 

1834 # _first() 

1835 # 

1836 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 

1837 # 

1838 # During execution of compute_first1, the result may be incomplete. 

1839 # Afterward (e.g., when called from compute_follow()), it will be complete. 

1840 # ------------------------------------------------------------------------- 

1841 def _first(self, beta): 

1842 

1843 # We are computing First(x1,x2,x3,...,xn) 

1844 result = [] 

1845 for x in beta: 

1846 x_produces_empty = False 

1847 

1848 # Add all the non-<empty> symbols of First[x] to the result. 

1849 for f in self.First[x]: 

1850 if f == "<empty>": 

1851 x_produces_empty = True 

1852 else: 

1853 if f not in result: 

1854 result.append(f) 

1855 

1856 if x_produces_empty: 

1857 # We have to consider the next x in beta, 

1858 # i.e. stay in the loop. 

1859 pass 

1860 else: 

1861 # We don't have to consider any further symbols in beta. 

1862 break 

1863 else: 

1864 # There was no 'break' from the loop, 

1865 # so x_produces_empty was true for all x in beta, 

1866 # so beta produces empty as well. 

1867 result.append("<empty>") 

1868 

1869 return result 

1870 

1871 # ------------------------------------------------------------------------- 

1872 # compute_first() 

1873 # 

1874 # Compute the value of FIRST1(X) for all symbols 

1875 # ------------------------------------------------------------------------- 

1876 def compute_first(self): 

1877 if self.First: 

1878 return self.First 

1879 

1880 # Terminals: 

1881 for t in self.Terminals: 

1882 self.First[t] = [t] 

1883 

1884 self.First["$end"] = ["$end"] 

1885 

1886 # Nonterminals: 

1887 

1888 # Initialize to the empty set: 

1889 for n in self.Nonterminals: 

1890 self.First[n] = [] 

1891 

1892 # Then propagate symbols until no change: 

1893 while True: 

1894 some_change = False 

1895 for n in self.Nonterminals: 

1896 for p in self.Prodnames[n]: 

1897 for f in self._first(p.prod): 

1898 if f not in self.First[n]: 

1899 self.First[n].append(f) 

1900 some_change = True 

1901 if not some_change: 

1902 break 

1903 

1904 return self.First 

1905 

1906 # --------------------------------------------------------------------- 

1907 # compute_follow() 

1908 # 

1909 # Computes all of the follow sets for every non-terminal symbol. The 

1910 # follow set is the set of all symbols that might follow a given 

1911 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 

1912 # --------------------------------------------------------------------- 

1913 def compute_follow(self, start=None): 

1914 # If already computed, return the result 

1915 if self.Follow: 

1916 return self.Follow 

1917 

1918 # If first sets not computed yet, do that first. 

1919 if not self.First: 

1920 self.compute_first() 

1921 

1922 # Add '$end' to the follow list of the start symbol 

1923 for k in self.Nonterminals: 

1924 self.Follow[k] = [] 

1925 

1926 if not start: 

1927 start = self.Productions[1].name 

1928 

1929 self.Follow[start] = ["$end"] 

1930 

1931 while True: 

1932 didadd = False 

1933 for p in self.Productions[1:]: 

1934 # Here is the production set 

1935 for i, B in enumerate(p.prod): 

1936 if B in self.Nonterminals: 

1937 # Okay. We got a non-terminal in a production 

1938 fst = self._first(p.prod[i + 1 :]) 

1939 hasempty = False 

1940 for f in fst: 

1941 if f != "<empty>" and f not in self.Follow[B]: 

1942 self.Follow[B].append(f) 

1943 didadd = True 

1944 if f == "<empty>": 

1945 hasempty = True 

1946 if hasempty or i == (len(p.prod) - 1): 

1947 # Add elements of follow(a) to follow(b) 

1948 for f in self.Follow[p.name]: 

1949 if f not in self.Follow[B]: 

1950 self.Follow[B].append(f) 

1951 didadd = True 

1952 if not didadd: 

1953 break 

1954 return self.Follow 

1955 

1956 # ----------------------------------------------------------------------------- 

1957 # build_lritems() 

1958 # 

1959 # This function walks the list of productions and builds a complete set of the 

1960 # LR items. The LR items are stored in two ways: First, they are uniquely 

1961 # numbered and placed in the list _lritems. Second, a linked list of LR items 

1962 # is built for each production. For example: 

1963 # 

1964 # E -> E PLUS E 

1965 # 

1966 # Creates the list 

1967 # 

1968 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 

1969 # ----------------------------------------------------------------------------- 

1970 

1971 def build_lritems(self): 

1972 for p in self.Productions: 

1973 lastlri = p 

1974 i = 0 

1975 lr_items = [] 

1976 while True: 

1977 if i > len(p): 

1978 lri = None 

1979 else: 

1980 lri = LRItem(p, i) 

1981 # Precompute the list of productions immediately following 

1982 try: 

1983 lri.lr_after = self.Prodnames[lri.prod[i + 1]] 

1984 except (IndexError, KeyError): 

1985 lri.lr_after = [] 

1986 try: 

1987 lri.lr_before = lri.prod[i - 1] 

1988 except IndexError: 

1989 lri.lr_before = None 

1990 

1991 lastlri.lr_next = lri 

1992 if not lri: 

1993 break 

1994 lr_items.append(lri) 

1995 lastlri = lri 

1996 i += 1 

1997 p.lr_items = lr_items 

1998 

1999 

2000# ----------------------------------------------------------------------------- 

2001# == Class LRTable == 

2002# 

2003# This basic class represents a basic table of LR parsing information. 

2004# Methods for generating the tables are not defined here. They are defined 

2005# in the derived class LRGeneratedTable. 

2006# ----------------------------------------------------------------------------- 

2007 

2008 

2009class VersionError(YaccError): 

2010 pass 

2011 

2012 

2013class LRTable(object): 

2014 def __init__(self): 

2015 self.lr_action = None 

2016 self.lr_goto = None 

2017 self.lr_productions = None 

2018 self.lr_method = None 

2019 

2020 def read_table(self, module): 

2021 if isinstance(module, types.ModuleType): 

2022 parsetab = module 

2023 else: 

2024 exec("import %s" % module) 

2025 parsetab = sys.modules[module] 

2026 

2027 if parsetab._tabversion != __tabversion__: 

2028 raise VersionError("yacc table file version is out of date") 

2029 

2030 self.lr_action = parsetab._lr_action 

2031 self.lr_goto = parsetab._lr_goto 

2032 

2033 self.lr_productions = [] 

2034 for p in parsetab._lr_productions: 

2035 self.lr_productions.append(MiniProduction(*p)) 

2036 

2037 self.lr_method = parsetab._lr_method 

2038 return parsetab._lr_signature 

2039 

2040 def read_pickle(self, filename): 

2041 try: 

2042 import cPickle as pickle 

2043 except ImportError: 

2044 import pickle 

2045 

2046 if not os.path.exists(filename): 

2047 raise ImportError 

2048 

2049 in_f = open(filename, "rb") 

2050 

2051 tabversion = pickle.load(in_f) 

2052 if tabversion != __tabversion__: 

2053 raise VersionError("yacc table file version is out of date") 

2054 self.lr_method = pickle.load(in_f) 

2055 signature = pickle.load(in_f) 

2056 self.lr_action = pickle.load(in_f) 

2057 self.lr_goto = pickle.load(in_f) 

2058 productions = pickle.load(in_f) 

2059 

2060 self.lr_productions = [] 

2061 for p in productions: 

2062 self.lr_productions.append(MiniProduction(*p)) 

2063 

2064 in_f.close() 

2065 return signature 

2066 

2067 # Bind all production function names to callable objects in pdict 

2068 def bind_callables(self, pdict): 

2069 for p in self.lr_productions: 

2070 p.bind(pdict) 

2071 

2072 

2073# ----------------------------------------------------------------------------- 

2074# === LR Generator === 

2075# 

2076# The following classes and functions are used to generate LR parsing tables on 

2077# a grammar. 

2078# ----------------------------------------------------------------------------- 

2079 

2080# ----------------------------------------------------------------------------- 

2081# digraph() 

2082# traverse() 

2083# 

2084# The following two functions are used to compute set valued functions 

2085# of the form: 

2086# 

2087# F(x) = F'(x) U U{F(y) | x R y} 

2088# 

2089# This is used to compute the values of Read() sets as well as FOLLOW sets 

2090# in LALR(1) generation. 

2091# 

2092# Inputs: X - An input set 

2093# R - A relation 

2094# FP - Set-valued function 

2095# ------------------------------------------------------------------------------ 

2096 

2097 

2098def digraph(X, R, FP): 

2099 N = {} 

2100 for x in X: 

2101 N[x] = 0 

2102 stack = [] 

2103 F = {} 

2104 for x in X: 

2105 if N[x] == 0: 

2106 traverse(x, N, stack, F, X, R, FP) 

2107 return F 

2108 

2109 

2110def traverse(x, N, stack, F, X, R, FP): 

2111 stack.append(x) 

2112 d = len(stack) 

2113 N[x] = d 

2114 F[x] = FP(x) # F(X) <- F'(x) 

2115 

2116 rel = R(x) # Get y's related to x 

2117 for y in rel: 

2118 if N[y] == 0: 

2119 traverse(y, N, stack, F, X, R, FP) 

2120 N[x] = min(N[x], N[y]) 

2121 for a in F.get(y, []): 

2122 if a not in F[x]: 

2123 F[x].append(a) 

2124 if N[x] == d: 

2125 N[stack[-1]] = MAXINT 

2126 F[stack[-1]] = F[x] 

2127 element = stack.pop() 

2128 while element != x: 

2129 N[stack[-1]] = MAXINT 

2130 F[stack[-1]] = F[x] 

2131 element = stack.pop() 

2132 

2133 

2134class LALRError(YaccError): 

2135 pass 

2136 

2137 

2138# ----------------------------------------------------------------------------- 

2139# == LRGeneratedTable == 

2140# 

2141# This class implements the LR table generation algorithm. There are no 

2142# public methods except for write() 

2143# ----------------------------------------------------------------------------- 

2144 

2145 

2146class LRGeneratedTable(LRTable): 

2147 def __init__(self, grammar, method="LALR", log=None): 

2148 if method not in ["SLR", "LALR"]: 

2149 raise LALRError("Unsupported method %s" % method) 

2150 

2151 self.grammar = grammar 

2152 self.lr_method = method 

2153 

2154 # Set up the logger 

2155 if not log: 

2156 log = NullLogger() 

2157 self.log = log 

2158 

2159 # Internal attributes 

2160 self.lr_action = {} # Action table 

2161 self.lr_goto = {} # Goto table 

2162 self.lr_productions = grammar.Productions # Copy of grammar Production array 

2163 self.lr_goto_cache = {} # Cache of computed gotos 

2164 self.lr0_cidhash = {} # Cache of closures 

2165 

2166 self._add_count = 0 # Internal counter used to detect cycles 

2167 

2168 # Diagonistic information filled in by the table generator 

2169 self.sr_conflict = 0 

2170 self.rr_conflict = 0 

2171 self.conflicts = [] # List of conflicts 

2172 

2173 self.sr_conflicts = [] 

2174 self.rr_conflicts = [] 

2175 

2176 # Build the tables 

2177 self.grammar.build_lritems() 

2178 self.grammar.compute_first() 

2179 self.grammar.compute_follow() 

2180 self.lr_parse_table() 

2181 

2182 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 

2183 

2184 def lr0_closure(self, I): 

2185 self._add_count += 1 

2186 

2187 # Add everything in I to J 

2188 J = I[:] 

2189 didadd = True 

2190 while didadd: 

2191 didadd = False 

2192 for j in J: 

2193 for x in j.lr_after: 

2194 if getattr(x, "lr0_added", 0) == self._add_count: 

2195 continue 

2196 # Add B --> .G to J 

2197 J.append(x.lr_next) 

2198 x.lr0_added = self._add_count 

2199 didadd = True 

2200 

2201 return J 

2202 

2203 # Compute the LR(0) goto function goto(I,X) where I is a set 

2204 # of LR(0) items and X is a grammar symbol. This function is written 

2205 # in a way that guarantees uniqueness of the generated goto sets 

2206 # (i.e. the same goto set will never be returned as two different Python 

2207 # objects). With uniqueness, we can later do fast set comparisons using 

2208 # id(obj) instead of element-wise comparison. 

2209 

2210 def lr0_goto(self, I, x): 

2211 # First we look for a previously cached entry 

2212 g = self.lr_goto_cache.get((id(I), x)) 

2213 if g: 

2214 return g 

2215 

2216 # Now we generate the goto set in a way that guarantees uniqueness 

2217 # of the result 

2218 

2219 s = self.lr_goto_cache.get(x) 

2220 if not s: 

2221 s = {} 

2222 self.lr_goto_cache[x] = s 

2223 

2224 gs = [] 

2225 for p in I: 

2226 n = p.lr_next 

2227 if n and n.lr_before == x: 

2228 s1 = s.get(id(n)) 

2229 if not s1: 

2230 s1 = {} 

2231 s[id(n)] = s1 

2232 gs.append(n) 

2233 s = s1 

2234 g = s.get("$end") 

2235 if not g: 

2236 if gs: 

2237 g = self.lr0_closure(gs) 

2238 s["$end"] = g 

2239 else: 

2240 s["$end"] = gs 

2241 self.lr_goto_cache[(id(I), x)] = g 

2242 return g 

2243 

2244 # Compute the LR(0) sets of item function 

2245 def lr0_items(self): 

2246 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] 

2247 i = 0 

2248 for I in C: 

2249 self.lr0_cidhash[id(I)] = i 

2250 i += 1 

2251 

2252 # Loop over the items in C and each grammar symbols 

2253 i = 0 

2254 while i < len(C): 

2255 I = C[i] 

2256 i += 1 

2257 

2258 # Collect all of the symbols that could possibly be in the goto(I,X) sets 

2259 asyms = {} 

2260 for ii in I: 

2261 for s in ii.usyms: 

2262 asyms[s] = None 

2263 

2264 for x in asyms: 

2265 g = self.lr0_goto(I, x) 

2266 if not g or id(g) in self.lr0_cidhash: 

2267 continue 

2268 self.lr0_cidhash[id(g)] = len(C) 

2269 C.append(g) 

2270 

2271 return C 

2272 

2273 # ----------------------------------------------------------------------------- 

2274 # ==== LALR(1) Parsing ==== 

2275 # 

2276 # LALR(1) parsing is almost exactly the same as SLR except that instead of 

2277 # relying upon Follow() sets when performing reductions, a more selective 

2278 # lookahead set that incorporates the state of the LR(0) machine is utilized. 

2279 # Thus, we mainly just have to focus on calculating the lookahead sets. 

2280 # 

2281 # The method used here is due to DeRemer and Pennelo (1982). 

2282 # 

2283 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 

2284 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 

2285 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 

2286 # 

2287 # Further details can also be found in: 

2288 # 

2289 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 

2290 # McGraw-Hill Book Company, (1985). 

2291 # 

2292 # ----------------------------------------------------------------------------- 

2293 

2294 # ----------------------------------------------------------------------------- 

2295 # compute_nullable_nonterminals() 

2296 # 

2297 # Creates a dictionary containing all of the non-terminals that might produce 

2298 # an empty production. 

2299 # ----------------------------------------------------------------------------- 

2300 

2301 def compute_nullable_nonterminals(self): 

2302 nullable = set() 

2303 num_nullable = 0 

2304 while True: 

2305 for p in self.grammar.Productions[1:]: 

2306 if p.len == 0: 

2307 nullable.add(p.name) 

2308 continue 

2309 for t in p.prod: 

2310 if t not in nullable: 

2311 break 

2312 else: 

2313 nullable.add(p.name) 

2314 if len(nullable) == num_nullable: 

2315 break 

2316 num_nullable = len(nullable) 

2317 return nullable 

2318 

2319 # ----------------------------------------------------------------------------- 

2320 # find_nonterminal_trans(C) 

2321 # 

2322 # Given a set of LR(0) items, this functions finds all of the non-terminal 

2323 # transitions. These are transitions in which a dot appears immediately before 

2324 # a non-terminal. Returns a list of tuples of the form (state,N) where state 

2325 # is the state number and N is the nonterminal symbol. 

2326 # 

2327 # The input C is the set of LR(0) items. 

2328 # ----------------------------------------------------------------------------- 

2329 

2330 def find_nonterminal_transitions(self, C): 

2331 trans = [] 

2332 for stateno, state in enumerate(C): 

2333 for p in state: 

2334 if p.lr_index < p.len - 1: 

2335 t = (stateno, p.prod[p.lr_index + 1]) 

2336 if t[1] in self.grammar.Nonterminals: 

2337 if t not in trans: 

2338 trans.append(t) 

2339 return trans 

2340 

2341 # ----------------------------------------------------------------------------- 

2342 # dr_relation() 

2343 # 

2344 # Computes the DR(p,A) relationships for non-terminal transitions. The input 

2345 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 

2346 # 

2347 # Returns a list of terminals. 

2348 # ----------------------------------------------------------------------------- 

2349 

2350 def dr_relation(self, C, trans, nullable): 

2351 state, N = trans 

2352 terms = [] 

2353 

2354 g = self.lr0_goto(C[state], N) 

2355 for p in g: 

2356 if p.lr_index < p.len - 1: 

2357 a = p.prod[p.lr_index + 1] 

2358 if a in self.grammar.Terminals: 

2359 if a not in terms: 

2360 terms.append(a) 

2361 

2362 # This extra bit is to handle the start state 

2363 if state == 0 and N == self.grammar.Productions[0].prod[0]: 

2364 terms.append("$end") 

2365 

2366 return terms 

2367 

2368 # ----------------------------------------------------------------------------- 

2369 # reads_relation() 

2370 # 

2371 # Computes the READS() relation (p,A) READS (t,C). 

2372 # ----------------------------------------------------------------------------- 

2373 

2374 def reads_relation(self, C, trans, empty): 

2375 # Look for empty transitions 

2376 rel = [] 

2377 state, N = trans 

2378 

2379 g = self.lr0_goto(C[state], N) 

2380 j = self.lr0_cidhash.get(id(g), -1) 

2381 for p in g: 

2382 if p.lr_index < p.len - 1: 

2383 a = p.prod[p.lr_index + 1] 

2384 if a in empty: 

2385 rel.append((j, a)) 

2386 

2387 return rel 

2388 

2389 # ----------------------------------------------------------------------------- 

2390 # compute_lookback_includes() 

2391 # 

2392 # Determines the lookback and includes relations 

2393 # 

2394 # LOOKBACK: 

2395 # 

2396 # This relation is determined by running the LR(0) state machine forward. 

2397 # For example, starting with a production "N : . A B C", we run it forward 

2398 # to obtain "N : A B C ." We then build a relationship between this final 

2399 # state and the starting state. These relationships are stored in a dictionary 

2400 # lookdict. 

2401 # 

2402 # INCLUDES: 

2403 # 

2404 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 

2405 # 

2406 # This relation is used to determine non-terminal transitions that occur 

2407 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 

2408 # if the following holds: 

2409 # 

2410 # B -> LAT, where T -> epsilon and p' -L-> p 

2411 # 

2412 # L is essentially a prefix (which may be empty), T is a suffix that must be 

2413 # able to derive an empty string. State p' must lead to state p with the string L. 

2414 # 

2415 # ----------------------------------------------------------------------------- 

2416 

2417 def compute_lookback_includes(self, C, trans, nullable): 

2418 lookdict = {} # Dictionary of lookback relations 

2419 includedict = {} # Dictionary of include relations 

2420 

2421 # Make a dictionary of non-terminal transitions 

2422 dtrans = {} 

2423 for t in trans: 

2424 dtrans[t] = 1 

2425 

2426 # Loop over all transitions and compute lookbacks and includes 

2427 for state, N in trans: 

2428 lookb = [] 

2429 includes = [] 

2430 for p in C[state]: 

2431 if p.name != N: 

2432 continue 

2433 

2434 # Okay, we have a name match. We now follow the production all the way 

2435 # through the state machine until we get the . on the right hand side 

2436 

2437 lr_index = p.lr_index 

2438 j = state 

2439 while lr_index < p.len - 1: 

2440 lr_index = lr_index + 1 

2441 t = p.prod[lr_index] 

2442 

2443 # Check to see if this symbol and state are a non-terminal transition 

2444 if (j, t) in dtrans: 

2445 # Yes. Okay, there is some chance that this is an includes relation 

2446 # the only way to know for certain is whether the rest of the 

2447 # production derives empty 

2448 

2449 li = lr_index + 1 

2450 while li < p.len: 

2451 if p.prod[li] in self.grammar.Terminals: 

2452 break # No forget it 

2453 if p.prod[li] not in nullable: 

2454 break 

2455 li = li + 1 

2456 else: 

2457 # Appears to be a relation between (j,t) and (state,N) 

2458 includes.append((j, t)) 

2459 

2460 g = self.lr0_goto(C[j], t) # Go to next set 

2461 j = self.lr0_cidhash.get(id(g), -1) # Go to next state 

2462 

2463 # When we get here, j is the final state, now we have to locate the production 

2464 for r in C[j]: 

2465 if r.name != p.name: 

2466 continue 

2467 if r.len != p.len: 

2468 continue 

2469 i = 0 

2470 # This look is comparing a production ". A B C" with "A B C ." 

2471 while i < r.lr_index: 

2472 if r.prod[i] != p.prod[i + 1]: 

2473 break 

2474 i = i + 1 

2475 else: 

2476 lookb.append((j, r)) 

2477 for i in includes: 

2478 if i not in includedict: 

2479 includedict[i] = [] 

2480 includedict[i].append((state, N)) 

2481 lookdict[(state, N)] = lookb 

2482 

2483 return lookdict, includedict 

2484 

2485 # ----------------------------------------------------------------------------- 

2486 # compute_read_sets() 

2487 # 

2488 # Given a set of LR(0) items, this function computes the read sets. 

2489 # 

2490 # Inputs: C = Set of LR(0) items 

2491 # ntrans = Set of nonterminal transitions 

2492 # nullable = Set of empty transitions 

2493 # 

2494 # Returns a set containing the read sets 

2495 # ----------------------------------------------------------------------------- 

2496 

2497 def compute_read_sets(self, C, ntrans, nullable): 

2498 FP = lambda x: self.dr_relation(C, x, nullable) 

2499 R = lambda x: self.reads_relation(C, x, nullable) 

2500 F = digraph(ntrans, R, FP) 

2501 return F 

2502 

2503 # ----------------------------------------------------------------------------- 

2504 # compute_follow_sets() 

2505 # 

2506 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 

2507 # and an include set, this function computes the follow sets 

2508 # 

2509 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 

2510 # 

2511 # Inputs: 

2512 # ntrans = Set of nonterminal transitions 

2513 # readsets = Readset (previously computed) 

2514 # inclsets = Include sets (previously computed) 

2515 # 

2516 # Returns a set containing the follow sets 

2517 # ----------------------------------------------------------------------------- 

2518 

2519 def compute_follow_sets(self, ntrans, readsets, inclsets): 

2520 FP = lambda x: readsets[x] 

2521 R = lambda x: inclsets.get(x, []) 

2522 F = digraph(ntrans, R, FP) 

2523 return F 

2524 

2525 # ----------------------------------------------------------------------------- 

2526 # add_lookaheads() 

2527 # 

2528 # Attaches the lookahead symbols to grammar rules. 

2529 # 

2530 # Inputs: lookbacks - Set of lookback relations 

2531 # followset - Computed follow set 

2532 # 

2533 # This function directly attaches the lookaheads to productions contained 

2534 # in the lookbacks set 

2535 # ----------------------------------------------------------------------------- 

2536 

2537 def add_lookaheads(self, lookbacks, followset): 

2538 for trans, lb in lookbacks.items(): 

2539 # Loop over productions in lookback 

2540 for state, p in lb: 

2541 if state not in p.lookaheads: 

2542 p.lookaheads[state] = [] 

2543 f = followset.get(trans, []) 

2544 for a in f: 

2545 if a not in p.lookaheads[state]: 

2546 p.lookaheads[state].append(a) 

2547 

2548 # ----------------------------------------------------------------------------- 

2549 # add_lalr_lookaheads() 

2550 # 

2551 # This function does all of the work of adding lookahead information for use 

2552 # with LALR parsing 

2553 # ----------------------------------------------------------------------------- 

2554 

2555 def add_lalr_lookaheads(self, C): 

2556 # Determine all of the nullable nonterminals 

2557 nullable = self.compute_nullable_nonterminals() 

2558 

2559 # Find all non-terminal transitions 

2560 trans = self.find_nonterminal_transitions(C) 

2561 

2562 # Compute read sets 

2563 readsets = self.compute_read_sets(C, trans, nullable) 

2564 

2565 # Compute lookback/includes relations 

2566 lookd, included = self.compute_lookback_includes(C, trans, nullable) 

2567 

2568 # Compute LALR FOLLOW sets 

2569 followsets = self.compute_follow_sets(trans, readsets, included) 

2570 

2571 # Add all of the lookaheads 

2572 self.add_lookaheads(lookd, followsets) 

2573 

2574 # ----------------------------------------------------------------------------- 

2575 # lr_parse_table() 

2576 # 

2577 # This function constructs the parse tables for SLR or LALR 

2578 # ----------------------------------------------------------------------------- 

2579 def lr_parse_table(self): 

2580 Productions = self.grammar.Productions 

2581 Precedence = self.grammar.Precedence 

2582 goto = self.lr_goto # Goto array 

2583 action = self.lr_action # Action array 

2584 log = self.log # Logger for output 

2585 

2586 actionp = {} # Action production array (temporary) 

2587 

2588 log.info("Parsing method: %s", self.lr_method) 

2589 

2590 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 

2591 # This determines the number of states 

2592 

2593 C = self.lr0_items() 

2594 

2595 if self.lr_method == "LALR": 

2596 self.add_lalr_lookaheads(C) 

2597 

2598 # Build the parser table, state by state 

2599 st = 0 

2600 for I in C: 

2601 # Loop over each production in I 

2602 actlist = [] # List of actions 

2603 st_action = {} 

2604 st_actionp = {} 

2605 st_goto = {} 

2606 log.info("") 

2607 log.info("state %d", st) 

2608 log.info("") 

2609 for p in I: 

2610 log.info(" (%d) %s", p.number, p) 

2611 log.info("") 

2612 

2613 for p in I: 

2614 if p.len == p.lr_index + 1: 

2615 if p.name == "S'": 

2616 # Start symbol. Accept! 

2617 st_action["$end"] = 0 

2618 st_actionp["$end"] = p 

2619 else: 

2620 # We are at the end of a production. Reduce! 

2621 if self.lr_method == "LALR": 

2622 laheads = p.lookaheads[st] 

2623 else: 

2624 laheads = self.grammar.Follow[p.name] 

2625 for a in laheads: 

2626 actlist.append((a, p, "reduce using rule %d (%s)" % (p.number, p))) 

2627 r = st_action.get(a) 

2628 if r is not None: 

2629 # Whoa. Have a shift/reduce or reduce/reduce conflict 

2630 if r > 0: 

2631 # Need to decide on shift or reduce here 

2632 # By default we favor shifting. Need to add 

2633 # some precedence rules here. 

2634 

2635 # Shift precedence comes from the token 

2636 sprec, slevel = Precedence.get(a, ("right", 0)) 

2637 

2638 # Reduce precedence comes from rule being reduced (p) 

2639 rprec, rlevel = Productions[p.number].prec 

2640 

2641 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == "left")): 

2642 # We really need to reduce here. 

2643 st_action[a] = -p.number 

2644 st_actionp[a] = p 

2645 if not slevel and not rlevel: 

2646 log.info(" ! shift/reduce conflict for %s resolved as reduce", a) 

2647 self.sr_conflicts.append((st, a, "reduce")) 

2648 Productions[p.number].reduced += 1 

2649 elif (slevel == rlevel) and (rprec == "nonassoc"): 

2650 st_action[a] = None 

2651 else: 

2652 # Hmmm. Guess we'll keep the shift 

2653 if not rlevel: 

2654 log.info(" ! shift/reduce conflict for %s resolved as shift", a) 

2655 self.sr_conflicts.append((st, a, "shift")) 

2656 elif r < 0: 

2657 # Reduce/reduce conflict. In this case, we favor the rule 

2658 # that was defined first in the grammar file 

2659 oldp = Productions[-r] 

2660 pp = Productions[p.number] 

2661 if oldp.line > pp.line: 

2662 st_action[a] = -p.number 

2663 st_actionp[a] = p 

2664 chosenp, rejectp = pp, oldp 

2665 Productions[p.number].reduced += 1 

2666 Productions[oldp.number].reduced -= 1 

2667 else: 

2668 chosenp, rejectp = oldp, pp 

2669 self.rr_conflicts.append((st, chosenp, rejectp)) 

2670 log.info( 

2671 " ! reduce/reduce conflict for %s resolved using rule %d (%s)", 

2672 a, 

2673 st_actionp[a].number, 

2674 st_actionp[a], 

2675 ) 

2676 else: 

2677 raise LALRError("Unknown conflict in state %d" % st) 

2678 else: 

2679 st_action[a] = -p.number 

2680 st_actionp[a] = p 

2681 Productions[p.number].reduced += 1 

2682 else: 

2683 i = p.lr_index 

2684 a = p.prod[i + 1] # Get symbol right after the "." 

2685 if a in self.grammar.Terminals: 

2686 g = self.lr0_goto(I, a) 

2687 j = self.lr0_cidhash.get(id(g), -1) 

2688 if j >= 0: 

2689 # We are in a shift state 

2690 actlist.append((a, p, "shift and go to state %d" % j)) 

2691 r = st_action.get(a) 

2692 if r is not None: 

2693 # Whoa have a shift/reduce or shift/shift conflict 

2694 if r > 0: 

2695 if r != j: 

2696 raise LALRError("Shift/shift conflict in state %d" % st) 

2697 elif r < 0: 

2698 # Do a precedence check. 

2699 # - if precedence of reduce rule is higher, we reduce. 

2700 # - if precedence of reduce is same and left assoc, we reduce. 

2701 # - otherwise we shift 

2702 

2703 # Shift precedence comes from the token 

2704 sprec, slevel = Precedence.get(a, ("right", 0)) 

2705 

2706 # Reduce precedence comes from the rule that could have been reduced 

2707 rprec, rlevel = Productions[st_actionp[a].number].prec 

2708 

2709 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == "right")): 

2710 # We decide to shift here... highest precedence to shift 

2711 Productions[st_actionp[a].number].reduced -= 1 

2712 st_action[a] = j 

2713 st_actionp[a] = p 

2714 if not rlevel: 

2715 log.info(" ! shift/reduce conflict for %s resolved as shift", a) 

2716 self.sr_conflicts.append((st, a, "shift")) 

2717 elif (slevel == rlevel) and (rprec == "nonassoc"): 

2718 st_action[a] = None 

2719 else: 

2720 # Hmmm. Guess we'll keep the reduce 

2721 if not slevel and not rlevel: 

2722 log.info(" ! shift/reduce conflict for %s resolved as reduce", a) 

2723 self.sr_conflicts.append((st, a, "reduce")) 

2724 

2725 else: 

2726 raise LALRError("Unknown conflict in state %d" % st) 

2727 else: 

2728 st_action[a] = j 

2729 st_actionp[a] = p 

2730 

2731 # Print the actions associated with each terminal 

2732 _actprint = {} 

2733 for a, p, m in actlist: 

2734 if a in st_action: 

2735 if p is st_actionp[a]: 

2736 log.info(" %-15s %s", a, m) 

2737 _actprint[(a, m)] = 1 

2738 log.info("") 

2739 # Print the actions that were not used. (debugging) 

2740 not_used = 0 

2741 for a, p, m in actlist: 

2742 if a in st_action: 

2743 if p is not st_actionp[a]: 

2744 if not (a, m) in _actprint: 

2745 log.debug(" ! %-15s [ %s ]", a, m) 

2746 not_used = 1 

2747 _actprint[(a, m)] = 1 

2748 if not_used: 

2749 log.debug("") 

2750 

2751 # Construct the goto table for this state 

2752 

2753 nkeys = {} 

2754 for ii in I: 

2755 for s in ii.usyms: 

2756 if s in self.grammar.Nonterminals: 

2757 nkeys[s] = None 

2758 for n in nkeys: 

2759 g = self.lr0_goto(I, n) 

2760 j = self.lr0_cidhash.get(id(g), -1) 

2761 if j >= 0: 

2762 st_goto[n] = j 

2763 log.info(" %-30s shift and go to state %d", n, j) 

2764 

2765 action[st] = st_action 

2766 actionp[st] = st_actionp 

2767 goto[st] = st_goto 

2768 st += 1 

2769 

2770 # ----------------------------------------------------------------------------- 

2771 # write() 

2772 # 

2773 # This function writes the LR parsing tables to a file 

2774 # ----------------------------------------------------------------------------- 

2775 

2776 def write_table(self, tabmodule, outputdir="", signature=""): 

2777 if isinstance(tabmodule, types.ModuleType): 

2778 raise IOError("Won't overwrite existing tabmodule") 

2779 

2780 basemodulename = tabmodule.split(".")[-1] 

2781 filename = os.path.join(outputdir, basemodulename) + ".py" 

2782 try: 

2783 f = open(filename, "w") 

2784 

2785 f.write( 

2786 """ 

2787# %s 

2788# This file is automatically generated. Do not edit. 

2789# pylint: disable=W,C,R 

2790_tabversion = %r 

2791 

2792_lr_method = %r 

2793 

2794_lr_signature = %r 

2795 """ 

2796 % (os.path.basename(filename), __tabversion__, self.lr_method, signature) 

2797 ) 

2798 

2799 # Change smaller to 0 to go back to original tables 

2800 smaller = 1 

2801 

2802 # Factor out names to try and make smaller 

2803 if smaller: 

2804 items = {} 

2805 

2806 for s, nd in self.lr_action.items(): 

2807 for name, v in nd.items(): 

2808 i = items.get(name) 

2809 if not i: 

2810 i = ([], []) 

2811 items[name] = i 

2812 i[0].append(s) 

2813 i[1].append(v) 

2814 

2815 f.write("\n_lr_action_items = {") 

2816 for k, v in items.items(): 

2817 f.write("%r:([" % k) 

2818 for i in v[0]: 

2819 f.write("%r," % i) 

2820 f.write("],[") 

2821 for i in v[1]: 

2822 f.write("%r," % i) 

2823 

2824 f.write("]),") 

2825 f.write("}\n") 

2826 

2827 f.write( 

2828 """ 

2829_lr_action = {} 

2830for _k, _v in _lr_action_items.items(): 

2831 for _x,_y in zip(_v[0],_v[1]): 

2832 if not _x in _lr_action: _lr_action[_x] = {} 

2833 _lr_action[_x][_k] = _y 

2834del _lr_action_items 

2835""" 

2836 ) 

2837 

2838 else: 

2839 f.write("\n_lr_action = { ") 

2840 for k, v in self.lr_action.items(): 

2841 f.write("(%r,%r):%r," % (k[0], k[1], v)) 

2842 f.write("}\n") 

2843 

2844 if smaller: 

2845 # Factor out names to try and make smaller 

2846 items = {} 

2847 

2848 for s, nd in self.lr_goto.items(): 

2849 for name, v in nd.items(): 

2850 i = items.get(name) 

2851 if not i: 

2852 i = ([], []) 

2853 items[name] = i 

2854 i[0].append(s) 

2855 i[1].append(v) 

2856 

2857 f.write("\n_lr_goto_items = {") 

2858 for k, v in items.items(): 

2859 f.write("%r:([" % k) 

2860 for i in v[0]: 

2861 f.write("%r," % i) 

2862 f.write("],[") 

2863 for i in v[1]: 

2864 f.write("%r," % i) 

2865 

2866 f.write("]),") 

2867 f.write("}\n") 

2868 

2869 f.write( 

2870 """ 

2871_lr_goto = {} 

2872for _k, _v in _lr_goto_items.items(): 

2873 for _x, _y in zip(_v[0], _v[1]): 

2874 if not _x in _lr_goto: _lr_goto[_x] = {} 

2875 _lr_goto[_x][_k] = _y 

2876del _lr_goto_items 

2877""" 

2878 ) 

2879 else: 

2880 f.write("\n_lr_goto = { ") 

2881 for k, v in self.lr_goto.items(): 

2882 f.write("(%r,%r):%r," % (k[0], k[1], v)) 

2883 f.write("}\n") 

2884 

2885 # Write production table 

2886 f.write("_lr_productions = [\n") 

2887 for p in self.lr_productions: 

2888 if p.func: 

2889 f.write( 

2890 " (%r,%r,%d,%r,%r,%d),\n" 

2891 % (p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line) 

2892 ) 

2893 else: 

2894 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p), p.name, p.len)) 

2895 f.write("]\n") 

2896 f.close() 

2897 

2898 except IOError as e: 

2899 raise 

2900 

2901 # ----------------------------------------------------------------------------- 

2902 # pickle_table() 

2903 # 

2904 # This function pickles the LR parsing tables to a supplied file object 

2905 # ----------------------------------------------------------------------------- 

2906 

2907 def pickle_table(self, filename, signature=""): 

2908 try: 

2909 import cPickle as pickle 

2910 except ImportError: 

2911 import pickle 

2912 with open(filename, "wb") as outf: 

2913 pickle.dump(__tabversion__, outf, pickle_protocol) 

2914 pickle.dump(self.lr_method, outf, pickle_protocol) 

2915 pickle.dump(signature, outf, pickle_protocol) 

2916 pickle.dump(self.lr_action, outf, pickle_protocol) 

2917 pickle.dump(self.lr_goto, outf, pickle_protocol) 

2918 

2919 outp = [] 

2920 for p in self.lr_productions: 

2921 if p.func: 

2922 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) 

2923 else: 

2924 outp.append((str(p), p.name, p.len, None, None, None)) 

2925 pickle.dump(outp, outf, pickle_protocol) 

2926 

2927 

2928# ----------------------------------------------------------------------------- 

2929# === INTROSPECTION === 

2930# 

2931# The following functions and classes are used to implement the PLY 

2932# introspection features followed by the yacc() function itself. 

2933# ----------------------------------------------------------------------------- 

2934 

2935# ----------------------------------------------------------------------------- 

2936# get_caller_module_dict() 

2937# 

2938# This function returns a dictionary containing all of the symbols defined within 

2939# a caller further down the call stack. This is used to get the environment 

2940# associated with the yacc() call if none was provided. 

2941# ----------------------------------------------------------------------------- 

2942 

2943 

2944def get_caller_module_dict(levels): 

2945 f = sys._getframe(levels) 

2946 ldict = f.f_globals.copy() 

2947 if f.f_globals != f.f_locals: 

2948 ldict.update(f.f_locals) 

2949 return ldict 

2950 

2951 

2952# ----------------------------------------------------------------------------- 

2953# parse_grammar() 

2954# 

2955# This takes a raw grammar rule string and parses it into production data 

2956# ----------------------------------------------------------------------------- 

2957def parse_grammar(doc, file, line): 

2958 grammar = [] 

2959 # Split the doc string into lines 

2960 pstrings = doc.splitlines() 

2961 lastp = None 

2962 dline = line 

2963 for ps in pstrings: 

2964 dline += 1 

2965 p = ps.split() 

2966 if not p: 

2967 continue 

2968 try: 

2969 if p[0] == "|": 

2970 # This is a continuation of a previous rule 

2971 if not lastp: 

2972 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) 

2973 prodname = lastp 

2974 syms = p[1:] 

2975 else: 

2976 prodname = p[0] 

2977 lastp = prodname 

2978 syms = p[2:] 

2979 assign = p[1] 

2980 if assign != ":" and assign != "::=": 

2981 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) 

2982 

2983 grammar.append((file, dline, prodname, syms)) 

2984 except SyntaxError: 

2985 raise 

2986 except Exception: 

2987 raise SyntaxError("%s:%d: Syntax error in rule %r" % (file, dline, ps.strip())) 

2988 

2989 return grammar 

2990 

2991 

2992# ----------------------------------------------------------------------------- 

2993# ParserReflect() 

2994# 

2995# This class represents information extracted for building a parser including 

2996# start symbol, error function, tokens, precedence list, action functions, 

2997# etc. 

2998# ----------------------------------------------------------------------------- 

2999class ParserReflect(object): 

3000 def __init__(self, pdict, log=None): 

3001 self.pdict = pdict 

3002 self.start = None 

3003 self.error_func = None 

3004 self.tokens = None 

3005 self.modules = set() 

3006 self.grammar = [] 

3007 self.error = False 

3008 

3009 if log is None: 

3010 self.log = PlyLogger(sys.stderr) 

3011 else: 

3012 self.log = log 

3013 

3014 # Get all of the basic information 

3015 def get_all(self): 

3016 self.get_start() 

3017 self.get_error_func() 

3018 self.get_tokens() 

3019 self.get_precedence() 

3020 self.get_pfunctions() 

3021 

3022 # Validate all of the information 

3023 def validate_all(self): 

3024 self.validate_start() 

3025 self.validate_error_func() 

3026 self.validate_tokens() 

3027 self.validate_precedence() 

3028 self.validate_pfunctions() 

3029 self.validate_modules() 

3030 return self.error 

3031 

3032 # Compute a signature over the grammar 

3033 def signature(self): 

3034 parts = [] 

3035 try: 

3036 if self.start: 

3037 parts.append(self.start) 

3038 if self.prec: 

3039 parts.append("".join(["".join(p) for p in self.prec])) 

3040 if self.tokens: 

3041 parts.append(" ".join(self.tokens)) 

3042 for f in self.pfuncs: 

3043 if f[3]: 

3044 parts.append(f[3]) 

3045 except (TypeError, ValueError): 

3046 pass 

3047 return "".join(parts) 

3048 

3049 # ----------------------------------------------------------------------------- 

3050 # validate_modules() 

3051 # 

3052 # This method checks to see if there are duplicated p_rulename() functions 

3053 # in the parser module file. Without this function, it is really easy for 

3054 # users to make mistakes by cutting and pasting code fragments (and it's a real 

3055 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 

3056 # we just do a little regular expression pattern matching of def statements 

3057 # to try and detect duplicates. 

3058 # ----------------------------------------------------------------------------- 

3059 

3060 def validate_modules(self): 

3061 # Match def p_funcname( 

3062 fre = re.compile(r"\s*def\s+(p_[a-zA-Z_0-9]*)\(") 

3063 

3064 for module in self.modules: 

3065 try: 

3066 lines, linen = inspect.getsourcelines(module) 

3067 except IOError: 

3068 continue 

3069 

3070 counthash = {} 

3071 for linen, line in enumerate(lines): 

3072 linen += 1 

3073 m = fre.match(line) 

3074 if m: 

3075 name = m.group(1) 

3076 prev = counthash.get(name) 

3077 if not prev: 

3078 counthash[name] = linen 

3079 else: 

3080 filename = inspect.getsourcefile(module) 

3081 self.log.warning( 

3082 "%s:%d: Function %s redefined. Previously defined on line %d", 

3083 filename, 

3084 linen, 

3085 name, 

3086 prev, 

3087 ) 

3088 

3089 # Get the start symbol 

3090 def get_start(self): 

3091 self.start = self.pdict.get("start") 

3092 

3093 # Validate the start symbol 

3094 def validate_start(self): 

3095 if self.start is not None: 

3096 if not isinstance(self.start, string_types): 

3097 self.log.error("'start' must be a string") 

3098 

3099 # Look for error handler 

3100 def get_error_func(self): 

3101 self.error_func = self.pdict.get("p_error") 

3102 

3103 # Validate the error function 

3104 def validate_error_func(self): 

3105 if self.error_func: 

3106 if isinstance(self.error_func, types.FunctionType): 

3107 ismethod = 0 

3108 elif isinstance(self.error_func, types.MethodType): 

3109 ismethod = 1 

3110 else: 

3111 self.log.error("'p_error' defined, but is not a function or method") 

3112 self.error = True 

3113 return 

3114 

3115 eline = self.error_func.__code__.co_firstlineno 

3116 efile = self.error_func.__code__.co_filename 

3117 module = inspect.getmodule(self.error_func) 

3118 self.modules.add(module) 

3119 

3120 argcount = self.error_func.__code__.co_argcount - ismethod 

3121 if argcount != 1: 

3122 self.log.error("%s:%d: p_error() requires 1 argument", efile, eline) 

3123 self.error = True 

3124 

3125 # Get the tokens map 

3126 def get_tokens(self): 

3127 tokens = self.pdict.get("tokens") 

3128 if not tokens: 

3129 self.log.error("No token list is defined") 

3130 self.error = True 

3131 return 

3132 

3133 if not isinstance(tokens, (list, tuple)): 

3134 self.log.error("tokens must be a list or tuple") 

3135 self.error = True 

3136 return 

3137 

3138 if not tokens: 

3139 self.log.error("tokens is empty") 

3140 self.error = True 

3141 return 

3142 

3143 self.tokens = sorted(tokens) 

3144 

3145 # Validate the tokens 

3146 def validate_tokens(self): 

3147 # Validate the tokens. 

3148 if "error" in self.tokens: 

3149 self.log.error("Illegal token name 'error'. Is a reserved word") 

3150 self.error = True 

3151 return 

3152 

3153 terminals = set() 

3154 for n in self.tokens: 

3155 if n in terminals: 

3156 self.log.warning("Token %r multiply defined", n) 

3157 terminals.add(n) 

3158 

3159 # Get the precedence map (if any) 

3160 def get_precedence(self): 

3161 self.prec = self.pdict.get("precedence") 

3162 

3163 # Validate and parse the precedence map 

3164 def validate_precedence(self): 

3165 preclist = [] 

3166 if self.prec: 

3167 if not isinstance(self.prec, (list, tuple)): 

3168 self.log.error("precedence must be a list or tuple") 

3169 self.error = True 

3170 return 

3171 for level, p in enumerate(self.prec): 

3172 if not isinstance(p, (list, tuple)): 

3173 self.log.error("Bad precedence table") 

3174 self.error = True 

3175 return 

3176 

3177 if len(p) < 2: 

3178 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)", p) 

3179 self.error = True 

3180 return 

3181 assoc = p[0] 

3182 if not isinstance(assoc, string_types): 

3183 self.log.error("precedence associativity must be a string") 

3184 self.error = True 

3185 return 

3186 for term in p[1:]: 

3187 if not isinstance(term, string_types): 

3188 self.log.error("precedence items must be strings") 

3189 self.error = True 

3190 return 

3191 preclist.append((term, assoc, level + 1)) 

3192 self.preclist = preclist 

3193 

3194 # Get all p_functions from the grammar 

3195 def get_pfunctions(self): 

3196 p_functions = [] 

3197 for name, item in self.pdict.items(): 

3198 if not name.startswith("p_") or name == "p_error": 

3199 continue 

3200 if isinstance(item, (types.FunctionType, types.MethodType)): 

3201 line = getattr(item, "co_firstlineno", item.__code__.co_firstlineno) 

3202 module = inspect.getmodule(item) 

3203 p_functions.append((line, module, name, item.__doc__)) 

3204 

3205 # Sort all of the actions by line number; make sure to stringify 

3206 # modules to make them sortable, since `line` may not uniquely sort all 

3207 # p functions 

3208 p_functions.sort( 

3209 key=lambda p_function: (p_function[0], str(p_function[1]), p_function[2], p_function[3]) 

3210 ) 

3211 self.pfuncs = p_functions 

3212 

3213 # Validate all of the p_functions 

3214 def validate_pfunctions(self): 

3215 grammar = [] 

3216 # Check for non-empty symbols 

3217 if len(self.pfuncs) == 0: 

3218 self.log.error("no rules of the form p_rulename are defined") 

3219 self.error = True 

3220 return 

3221 

3222 for line, module, name, doc in self.pfuncs: 

3223 file = inspect.getsourcefile(module) 

3224 func = self.pdict[name] 

3225 if isinstance(func, types.MethodType): 

3226 reqargs = 2 

3227 else: 

3228 reqargs = 1 

3229 if func.__code__.co_argcount > reqargs: 

3230 self.log.error("%s:%d: Rule %r has too many arguments", file, line, func.__name__) 

3231 self.error = True 

3232 elif func.__code__.co_argcount < reqargs: 

3233 self.log.error("%s:%d: Rule %r requires an argument", file, line, func.__name__) 

3234 self.error = True 

3235 elif not func.__doc__: 

3236 self.log.warning( 

3237 "%s:%d: No documentation string specified in function %r (ignored)", 

3238 file, 

3239 line, 

3240 func.__name__, 

3241 ) 

3242 else: 

3243 try: 

3244 parsed_g = parse_grammar(doc, file, line) 

3245 for g in parsed_g: 

3246 grammar.append((name, g)) 

3247 except SyntaxError as e: 

3248 self.log.error(str(e)) 

3249 self.error = True 

3250 

3251 # Looks like a valid grammar rule 

3252 # Mark the file in which defined. 

3253 self.modules.add(module) 

3254 

3255 # Secondary validation step that looks for p_ definitions that are not functions 

3256 # or functions that look like they might be grammar rules. 

3257 

3258 for n, v in self.pdict.items(): 

3259 if n.startswith("p_") and isinstance(v, (types.FunctionType, types.MethodType)): 

3260 continue 

3261 if n.startswith("t_"): 

3262 continue 

3263 if n.startswith("p_") and n != "p_error": 

3264 self.log.warning("%r not defined as a function", n) 

3265 if (isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or ( 

3266 isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2 

3267 ): 

3268 if v.__doc__: 

3269 try: 

3270 doc = v.__doc__.split(" ") 

3271 if doc[1] == ":": 

3272 self.log.warning( 

3273 "%s:%d: Possible grammar rule %r defined without p_ prefix", 

3274 v.__code__.co_filename, 

3275 v.__code__.co_firstlineno, 

3276 n, 

3277 ) 

3278 except IndexError: 

3279 pass 

3280 

3281 self.grammar = grammar 

3282 

3283 

3284# ----------------------------------------------------------------------------- 

3285# yacc(module) 

3286# 

3287# Build a parser 

3288# ----------------------------------------------------------------------------- 

3289 

3290 

3291def yacc( 

3292 method="LALR", 

3293 debug=yaccdebug, 

3294 module=None, 

3295 tabmodule=tab_module, 

3296 start=None, 

3297 check_recursion=True, 

3298 optimize=False, 

3299 write_tables=True, 

3300 debugfile=debug_file, 

3301 outputdir=None, 

3302 debuglog=None, 

3303 errorlog=None, 

3304 picklefile=None, 

3305): 

3306 

3307 if tabmodule is None: 

3308 tabmodule = tab_module 

3309 

3310 # Reference to the parsing method of the last built parser 

3311 global parse 

3312 

3313 # If pickling is enabled, table files are not created 

3314 if picklefile: 

3315 write_tables = 0 

3316 

3317 if errorlog is None: 

3318 errorlog = PlyLogger(sys.stderr) 

3319 

3320 # Get the module dictionary used for the parser 

3321 if module: 

3322 _items = [(k, getattr(module, k)) for k in dir(module)] 

3323 pdict = dict(_items) 

3324 # If no __file__ or __package__ attributes are available, try to obtain them 

3325 # from the __module__ instead 

3326 if "__file__" not in pdict: 

3327 pdict["__file__"] = sys.modules[pdict["__module__"]].__file__ 

3328 if "__package__" not in pdict and "__module__" in pdict: 

3329 if hasattr(sys.modules[pdict["__module__"]], "__package__"): 

3330 pdict["__package__"] = sys.modules[pdict["__module__"]].__package__ 

3331 else: 

3332 pdict = get_caller_module_dict(2) 

3333 

3334 if outputdir is None: 

3335 # If no output directory is set, the location of the output files 

3336 # is determined according to the following rules: 

3337 # - If tabmodule specifies a package, files go into that package directory 

3338 # - Otherwise, files go in the same directory as the specifying module 

3339 if isinstance(tabmodule, types.ModuleType): 

3340 srcfile = tabmodule.__file__ 

3341 else: 

3342 if "." not in tabmodule: 

3343 srcfile = pdict["__file__"] 

3344 else: 

3345 parts = tabmodule.split(".") 

3346 pkgname = ".".join(parts[:-1]) 

3347 exec("import %s" % pkgname) 

3348 srcfile = getattr(sys.modules[pkgname], "__file__", "") 

3349 outputdir = os.path.dirname(srcfile) 

3350 

3351 # Determine if the module is package of a package or not. 

3352 # If so, fix the tabmodule setting so that tables load correctly 

3353 pkg = pdict.get("__package__") 

3354 if pkg and isinstance(tabmodule, str): 

3355 if "." not in tabmodule: 

3356 tabmodule = pkg + "." + tabmodule 

3357 

3358 # Set start symbol if it's specified directly using an argument 

3359 if start is not None: 

3360 pdict["start"] = start 

3361 

3362 # Collect parser information from the dictionary 

3363 pinfo = ParserReflect(pdict, log=errorlog) 

3364 pinfo.get_all() 

3365 

3366 if pinfo.error: 

3367 raise YaccError("Unable to build parser") 

3368 

3369 # Check signature against table files (if any) 

3370 signature = pinfo.signature() 

3371 

3372 # Read the tables 

3373 try: 

3374 lr = LRTable() 

3375 if picklefile: 

3376 read_signature = lr.read_pickle(picklefile) 

3377 else: 

3378 read_signature = lr.read_table(tabmodule) 

3379 if optimize or (read_signature == signature): 

3380 try: 

3381 lr.bind_callables(pinfo.pdict) 

3382 parser = LRParser(lr, pinfo.error_func) 

3383 parse = parser.parse 

3384 return parser 

3385 except Exception as e: 

3386 errorlog.warning("There was a problem loading the table file: %r", e) 

3387 except VersionError as e: 

3388 errorlog.warning(str(e)) 

3389 except ImportError: 

3390 pass 

3391 

3392 if debuglog is None: 

3393 if debug: 

3394 try: 

3395 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), "w")) 

3396 except IOError as e: 

3397 errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) 

3398 debuglog = NullLogger() 

3399 else: 

3400 debuglog = NullLogger() 

3401 

3402 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) 

3403 

3404 errors = False 

3405 

3406 # Validate the parser information 

3407 if pinfo.validate_all(): 

3408 raise YaccError("Unable to build parser") 

3409 

3410 if not pinfo.error_func: 

3411 errorlog.warning("no p_error() function is defined") 

3412 

3413 # Create a grammar object 

3414 grammar = Grammar(pinfo.tokens) 

3415 

3416 # Set precedence level for terminals 

3417 for term, assoc, level in pinfo.preclist: 

3418 try: 

3419 grammar.set_precedence(term, assoc, level) 

3420 except GrammarError as e: 

3421 errorlog.warning("%s", e) 

3422 

3423 # Add productions to the grammar 

3424 for funcname, gram in pinfo.grammar: 

3425 file, line, prodname, syms = gram 

3426 try: 

3427 grammar.add_production(prodname, syms, funcname, file, line) 

3428 except GrammarError as e: 

3429 errorlog.error("%s", e) 

3430 errors = True 

3431 

3432 # Set the grammar start symbols 

3433 try: 

3434 if start is None: 

3435 grammar.set_start(pinfo.start) 

3436 else: 

3437 grammar.set_start(start) 

3438 except GrammarError as e: 

3439 errorlog.error(str(e)) 

3440 errors = True 

3441 

3442 if errors: 

3443 raise YaccError("Unable to build parser") 

3444 

3445 # Verify the grammar structure 

3446 undefined_symbols = grammar.undefined_symbols() 

3447 for sym, prod in undefined_symbols: 

3448 errorlog.error( 

3449 "%s:%d: Symbol %r used, but not defined as a token or a rule", prod.file, prod.line, sym 

3450 ) 

3451 errors = True 

3452 

3453 unused_terminals = grammar.unused_terminals() 

3454 if unused_terminals: 

3455 debuglog.info("") 

3456 debuglog.info("Unused terminals:") 

3457 debuglog.info("") 

3458 for term in unused_terminals: 

3459 errorlog.warning("Token %r defined, but not used", term) 

3460 debuglog.info(" %s", term) 

3461 

3462 # Print out all productions to the debug log 

3463 if debug: 

3464 debuglog.info("") 

3465 debuglog.info("Grammar") 

3466 debuglog.info("") 

3467 for n, p in enumerate(grammar.Productions): 

3468 debuglog.info("Rule %-5d %s", n, p) 

3469 

3470 # Find unused non-terminals 

3471 unused_rules = grammar.unused_rules() 

3472 for prod in unused_rules: 

3473 errorlog.warning("%s:%d: Rule %r defined, but not used", prod.file, prod.line, prod.name) 

3474 

3475 if len(unused_terminals) == 1: 

3476 errorlog.warning("There is 1 unused token") 

3477 if len(unused_terminals) > 1: 

3478 errorlog.warning("There are %d unused tokens", len(unused_terminals)) 

3479 

3480 if len(unused_rules) == 1: 

3481 errorlog.warning("There is 1 unused rule") 

3482 if len(unused_rules) > 1: 

3483 errorlog.warning("There are %d unused rules", len(unused_rules)) 

3484 

3485 if debug: 

3486 debuglog.info("") 

3487 debuglog.info("Terminals, with rules where they appear") 

3488 debuglog.info("") 

3489 terms = list(grammar.Terminals) 

3490 terms.sort() 

3491 for term in terms: 

3492 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) 

3493 

3494 debuglog.info("") 

3495 debuglog.info("Nonterminals, with rules where they appear") 

3496 debuglog.info("") 

3497 nonterms = list(grammar.Nonterminals) 

3498 nonterms.sort() 

3499 for nonterm in nonterms: 

3500 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) 

3501 debuglog.info("") 

3502 

3503 if check_recursion: 

3504 unreachable = grammar.find_unreachable() 

3505 for u in unreachable: 

3506 errorlog.warning("Symbol %r is unreachable", u) 

3507 

3508 infinite = grammar.infinite_cycles() 

3509 for inf in infinite: 

3510 errorlog.error("Infinite recursion detected for symbol %r", inf) 

3511 errors = True 

3512 

3513 unused_prec = grammar.unused_precedence() 

3514 for term, assoc in unused_prec: 

3515 errorlog.error("Precedence rule %r defined for unknown symbol %r", assoc, term) 

3516 errors = True 

3517 

3518 if errors: 

3519 raise YaccError("Unable to build parser") 

3520 

3521 # Run the LRGeneratedTable on the grammar 

3522 if debug: 

3523 errorlog.debug("Generating %s tables", method) 

3524 

3525 lr = LRGeneratedTable(grammar, method, debuglog) 

3526 

3527 if debug: 

3528 num_sr = len(lr.sr_conflicts) 

3529 

3530 # Report shift/reduce and reduce/reduce conflicts 

3531 if num_sr == 1: 

3532 errorlog.warning("1 shift/reduce conflict") 

3533 elif num_sr > 1: 

3534 errorlog.warning("%d shift/reduce conflicts", num_sr) 

3535 

3536 num_rr = len(lr.rr_conflicts) 

3537 if num_rr == 1: 

3538 errorlog.warning("1 reduce/reduce conflict") 

3539 elif num_rr > 1: 

3540 errorlog.warning("%d reduce/reduce conflicts", num_rr) 

3541 

3542 # Write out conflicts to the output file 

3543 if debug and (lr.sr_conflicts or lr.rr_conflicts): 

3544 debuglog.warning("") 

3545 debuglog.warning("Conflicts:") 

3546 debuglog.warning("") 

3547 

3548 for state, tok, resolution in lr.sr_conflicts: 

3549 debuglog.warning( 

3550 "shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution 

3551 ) 

3552 

3553 already_reported = set() 

3554 for state, rule, rejected in lr.rr_conflicts: 

3555 if (state, id(rule), id(rejected)) in already_reported: 

3556 continue 

3557 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 

3558 debuglog.warning("rejected rule (%s) in state %d", rejected, state) 

3559 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 

3560 errorlog.warning("rejected rule (%s) in state %d", rejected, state) 

3561 already_reported.add((state, id(rule), id(rejected))) 

3562 

3563 warned_never = [] 

3564 for state, rule, rejected in lr.rr_conflicts: 

3565 if not rejected.reduced and (rejected not in warned_never): 

3566 debuglog.warning("Rule (%s) is never reduced", rejected) 

3567 errorlog.warning("Rule (%s) is never reduced", rejected) 

3568 warned_never.append(rejected) 

3569 

3570 # Write the table file if requested 

3571 if write_tables: 

3572 try: 

3573 lr.write_table(tabmodule, outputdir, signature) 

3574 if tabmodule in sys.modules: 

3575 del sys.modules[tabmodule] 

3576 except IOError as e: 

3577 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) 

3578 

3579 # Write a pickled version of the tables 

3580 if picklefile: 

3581 try: 

3582 lr.pickle_table(picklefile, signature) 

3583 except IOError as e: 

3584 errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) 

3585 

3586 # Build the parser 

3587 lr.bind_callables(pinfo.pdict) 

3588 parser = LRParser(lr, pinfo.error_func) 

3589 

3590 parse = parser.parse 

3591 return parser