Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/ply/yacc.py: 5%

1921 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-01 19:55 +0000

1# ----------------------------------------------------------------------------- 

2# ply: yacc.py 

3# 

4# Copyright (C) 2001-2018 

5# David M. Beazley (Dabeaz LLC) 

6# All rights reserved. 

7# 

8# Redistribution and use in source and binary forms, with or without 

9# modification, are permitted provided that the following conditions are 

10# met: 

11# 

12# * Redistributions of source code must retain the above copyright notice, 

13# this list of conditions and the following disclaimer. 

14# * Redistributions in binary form must reproduce the above copyright notice, 

15# this list of conditions and the following disclaimer in the documentation 

16# and/or other materials provided with the distribution. 

17# * Neither the name of the David Beazley or Dabeaz LLC may be used to 

18# endorse or promote products derived from this software without 

19# specific prior written permission. 

20# 

21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

32# ----------------------------------------------------------------------------- 

33# 

34# This implements an LR parser that is constructed from grammar rules defined 

35# as Python functions. The grammar is specified by supplying the BNF inside 

36# Python documentation strings. The inspiration for this technique was borrowed 

37# from John Aycock's Spark parsing system. PLY might be viewed as cross between 

38# Spark and the GNU bison utility. 

39# 

40# The current implementation is only somewhat object-oriented. The 

41# LR parser itself is defined in terms of an object (which allows multiple 

42# parsers to co-exist). However, most of the variables used during table 

43# construction are defined in terms of global variables. Users shouldn't 

44# notice unless they are trying to define multiple parsers at the same 

45# time using threads (in which case they should have their head examined). 

46# 

47# This implementation supports both SLR and LALR(1) parsing. LALR(1) 

48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 

49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 

50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 

51# by the more efficient DeRemer and Pennello algorithm. 

52# 

53# :::::::: WARNING ::::::: 

54# 

55# Construction of LR parsing tables is fairly complicated and expensive. 

56# To make this module run fast, a *LOT* of work has been put into 

57# optimization---often at the expensive of readability and what might 

58# consider to be good Python "coding style." Modify the code at your 

59# own risk! 

60# ---------------------------------------------------------------------------- 

61 

62import re 

63import types 

64import sys 

65import os.path 

66import inspect 

67import warnings 

68 

69__version__ = '3.11' 

70__tabversion__ = '3.10' 

71 

72#----------------------------------------------------------------------------- 

73# === User configurable parameters === 

74# 

75# Change these to modify the default behavior of yacc (if you wish) 

76#----------------------------------------------------------------------------- 

77 

78yaccdebug = True # Debugging mode. If set, yacc generates a 

79 # a 'parser.out' file in the current directory 

80 

81debug_file = 'parser.out' # Default name of the debugging file 

82tab_module = 'parsetab' # Default name of the table module 

83default_lr = 'LALR' # Default LR table generation method 

84 

85error_count = 3 # Number of symbols that must be shifted to leave recovery mode 

86 

87yaccdevel = False # Set to True if developing yacc. This turns off optimized 

88 # implementations of certain functions. 

89 

90resultlimit = 40 # Size limit of results when running in debug mode. 

91 

92pickle_protocol = 0 # Protocol to use when writing pickle files 

93 

94# String type-checking compatibility 

95if sys.version_info[0] < 3: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true

96 string_types = basestring 

97else: 

98 string_types = str 

99 

100MAXINT = sys.maxsize 

101 

102# This object is a stand-in for a logging object created by the 

103# logging module. PLY will use this by default to create things 

104# such as the parser.out file. If a user wants more detailed 

105# information, they can create their own logging object and pass 

106# it into PLY. 

107 

108class PlyLogger(object): 

109 def __init__(self, f): 

110 self.f = f 

111 

112 def debug(self, msg, *args, **kwargs): 

113 self.f.write((msg % args) + '\n') 

114 

115 info = debug 

116 

117 def warning(self, msg, *args, **kwargs): 

118 self.f.write('WARNING: ' + (msg % args) + '\n') 

119 

120 def error(self, msg, *args, **kwargs): 

121 self.f.write('ERROR: ' + (msg % args) + '\n') 

122 

123 critical = debug 

124 

125# Null logger is used when no output is generated. Does nothing. 

126class NullLogger(object): 

127 def __getattribute__(self, name): 

128 return self 

129 

130 def __call__(self, *args, **kwargs): 

131 return self 

132 

133# Exception raised for yacc-related errors 

134class YaccError(Exception): 

135 pass 

136 

137# Format the result message that the parser produces when running in debug mode. 

138def format_result(r): 

139 repr_str = repr(r) 

140 if '\n' in repr_str: 

141 repr_str = repr(repr_str) 

142 if len(repr_str) > resultlimit: 

143 repr_str = repr_str[:resultlimit] + ' ...' 

144 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) 

145 return result 

146 

147# Format stack entries when the parser is running in debug mode 

148def format_stack_entry(r): 

149 repr_str = repr(r) 

150 if '\n' in repr_str: 

151 repr_str = repr(repr_str) 

152 if len(repr_str) < 16: 

153 return repr_str 

154 else: 

155 return '<%s @ 0x%x>' % (type(r).__name__, id(r)) 

156 

157# Panic mode error recovery support. This feature is being reworked--much of the 

158# code here is to offer a deprecation/backwards compatible transition 

159 

160_errok = None 

161_token = None 

162_restart = None 

163_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). 

164Instead, invoke the methods on the associated parser instance: 

165 

166 def p_error(p): 

167 ... 

168 # Use parser.errok(), parser.token(), parser.restart() 

169 ... 

170 

171 parser = yacc.yacc() 

172''' 

173 

174def errok(): 

175 warnings.warn(_warnmsg) 

176 return _errok() 

177 

178def restart(): 

179 warnings.warn(_warnmsg) 

180 return _restart() 

181 

182def token(): 

183 warnings.warn(_warnmsg) 

184 return _token() 

185 

186# Utility function to call the p_error() function with some deprecation hacks 

187def call_errorfunc(errorfunc, token, parser): 

188 global _errok, _token, _restart 

189 _errok = parser.errok 

190 _token = parser.token 

191 _restart = parser.restart 

192 r = errorfunc(token) 

193 try: 

194 del _errok, _token, _restart 

195 except NameError: 

196 pass 

197 return r 

198 

199#----------------------------------------------------------------------------- 

200# === LR Parsing Engine === 

201# 

202# The following classes are used for the LR parser itself. These are not 

203# used during table construction and are independent of the actual LR 

204# table generation algorithm 

205#----------------------------------------------------------------------------- 

206 

207# This class is used to hold non-terminal grammar symbols during parsing. 

208# It normally has the following attributes set: 

209# .type = Grammar symbol type 

210# .value = Symbol value 

211# .lineno = Starting line number 

212# .endlineno = Ending line number (optional, set automatically) 

213# .lexpos = Starting lex position 

214# .endlexpos = Ending lex position (optional, set automatically) 

215 

216class YaccSymbol: 

217 def __str__(self): 

218 return self.type 

219 

220 def __repr__(self): 

221 return str(self) 

222 

223# This class is a wrapper around the objects actually passed to each 

224# grammar rule. Index lookup and assignment actually assign the 

225# .value attribute of the underlying YaccSymbol object. 

226# The lineno() method returns the line number of a given 

227# item (or 0 if not defined). The linespan() method returns 

228# a tuple of (startline,endline) representing the range of lines 

229# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 

230# representing the range of positional information for a symbol. 

231 

232class YaccProduction: 

233 def __init__(self, s, stack=None): 

234 self.slice = s 

235 self.stack = stack 

236 self.lexer = None 

237 self.parser = None 

238 

239 def __getitem__(self, n): 

240 if isinstance(n, slice): 

241 return [s.value for s in self.slice[n]] 

242 elif n >= 0: 

243 return self.slice[n].value 

244 else: 

245 return self.stack[n].value 

246 

247 def __setitem__(self, n, v): 

248 self.slice[n].value = v 

249 

250 def __getslice__(self, i, j): 

251 return [s.value for s in self.slice[i:j]] 

252 

253 def __len__(self): 

254 return len(self.slice) 

255 

256 def lineno(self, n): 

257 return getattr(self.slice[n], 'lineno', 0) 

258 

259 def set_lineno(self, n, lineno): 

260 self.slice[n].lineno = lineno 

261 

262 def linespan(self, n): 

263 startline = getattr(self.slice[n], 'lineno', 0) 

264 endline = getattr(self.slice[n], 'endlineno', startline) 

265 return startline, endline 

266 

267 def lexpos(self, n): 

268 return getattr(self.slice[n], 'lexpos', 0) 

269 

270 def set_lexpos(self, n, lexpos): 

271 self.slice[n].lexpos = lexpos 

272 

273 def lexspan(self, n): 

274 startpos = getattr(self.slice[n], 'lexpos', 0) 

275 endpos = getattr(self.slice[n], 'endlexpos', startpos) 

276 return startpos, endpos 

277 

278 def error(self): 

279 raise SyntaxError 

280 

281# ----------------------------------------------------------------------------- 

282# == LRParser == 

283# 

284# The LR Parsing engine. 

285# ----------------------------------------------------------------------------- 

286 

287class LRParser: 

288 def __init__(self, lrtab, errorf): 

289 self.productions = lrtab.lr_productions 

290 self.action = lrtab.lr_action 

291 self.goto = lrtab.lr_goto 

292 self.errorfunc = errorf 

293 self.set_defaulted_states() 

294 self.errorok = True 

295 

296 def errok(self): 

297 self.errorok = True 

298 

299 def restart(self): 

300 del self.statestack[:] 

301 del self.symstack[:] 

302 sym = YaccSymbol() 

303 sym.type = '$end' 

304 self.symstack.append(sym) 

305 self.statestack.append(0) 

306 

307 # Defaulted state support. 

308 # This method identifies parser states where there is only one possible reduction action. 

309 # For such states, the parser can make a choose to make a rule reduction without consuming 

310 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in 

311 # certain kinds of advanced parsing situations where the lexer and parser interact with 

312 # each other or change states (i.e., manipulation of scope, lexer states, etc.). 

313 # 

314 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions 

315 def set_defaulted_states(self): 

316 self.defaulted_states = {} 

317 for state, actions in self.action.items(): 

318 rules = list(actions.values()) 

319 if len(rules) == 1 and rules[0] < 0: 

320 self.defaulted_states[state] = rules[0] 

321 

322 def disable_defaulted_states(self): 

323 self.defaulted_states = {} 

324 

325 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

326 if debug or yaccdevel: 

327 if isinstance(debug, int): 

328 debug = PlyLogger(sys.stderr) 

329 return self.parsedebug(input, lexer, debug, tracking, tokenfunc) 

330 elif tracking: 

331 return self.parseopt(input, lexer, debug, tracking, tokenfunc) 

332 else: 

333 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) 

334 

335 

336 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

337 # parsedebug(). 

338 # 

339 # This is the debugging enabled version of parse(). All changes made to the 

340 # parsing engine should be made here. Optimized versions of this function 

341 # are automatically created by the ply/ygen.py script. This script cuts out 

342 # sections enclosed in markers such as this: 

343 # 

344 # #--! DEBUG 

345 # statements 

346 # #--! DEBUG 

347 # 

348 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

349 

350 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

351 #--! parsedebug-start 

352 lookahead = None # Current lookahead symbol 

353 lookaheadstack = [] # Stack of lookahead symbols 

354 actions = self.action # Local reference to action table (to avoid lookup on self.) 

355 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

356 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

357 defaulted_states = self.defaulted_states # Local reference to defaulted states 

358 pslice = YaccProduction(None) # Production object passed to grammar rules 

359 errorcount = 0 # Used during error recovery 

360 

361 #--! DEBUG 

362 debug.info('PLY: PARSE DEBUG START') 

363 #--! DEBUG 

364 

365 # If no lexer was given, we will try to use the lex module 

366 if not lexer: 

367 from . import lex 

368 lexer = lex.lexer 

369 

370 # Set up the lexer and parser objects on pslice 

371 pslice.lexer = lexer 

372 pslice.parser = self 

373 

374 # If input was supplied, pass to lexer 

375 if input is not None: 

376 lexer.input(input) 

377 

378 if tokenfunc is None: 

379 # Tokenize function 

380 get_token = lexer.token 

381 else: 

382 get_token = tokenfunc 

383 

384 # Set the parser() token method (sometimes used in error recovery) 

385 self.token = get_token 

386 

387 # Set up the state and symbol stacks 

388 

389 statestack = [] # Stack of parsing states 

390 self.statestack = statestack 

391 symstack = [] # Stack of grammar symbols 

392 self.symstack = symstack 

393 

394 pslice.stack = symstack # Put in the production 

395 errtoken = None # Err token 

396 

397 # The start state is assumed to be (0,$end) 

398 

399 statestack.append(0) 

400 sym = YaccSymbol() 

401 sym.type = '$end' 

402 symstack.append(sym) 

403 state = 0 

404 while True: 

405 # Get the next symbol on the input. If a lookahead symbol 

406 # is already set, we just use that. Otherwise, we'll pull 

407 # the next token off of the lookaheadstack or from the lexer 

408 

409 #--! DEBUG 

410 debug.debug('') 

411 debug.debug('State : %s', state) 

412 #--! DEBUG 

413 

414 if state not in defaulted_states: 

415 if not lookahead: 

416 if not lookaheadstack: 

417 lookahead = get_token() # Get the next token 

418 else: 

419 lookahead = lookaheadstack.pop() 

420 if not lookahead: 

421 lookahead = YaccSymbol() 

422 lookahead.type = '$end' 

423 

424 # Check the action table 

425 ltype = lookahead.type 

426 t = actions[state].get(ltype) 

427 else: 

428 t = defaulted_states[state] 

429 #--! DEBUG 

430 debug.debug('Defaulted state %s: Reduce using %d', state, -t) 

431 #--! DEBUG 

432 

433 #--! DEBUG 

434 debug.debug('Stack : %s', 

435 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 

436 #--! DEBUG 

437 

438 if t is not None: 

439 if t > 0: 

440 # shift a symbol on the stack 

441 statestack.append(t) 

442 state = t 

443 

444 #--! DEBUG 

445 debug.debug('Action : Shift and goto state %s', t) 

446 #--! DEBUG 

447 

448 symstack.append(lookahead) 

449 lookahead = None 

450 

451 # Decrease error count on successful shift 

452 if errorcount: 

453 errorcount -= 1 

454 continue 

455 

456 if t < 0: 

457 # reduce a symbol on the stack, emit a production 

458 p = prod[-t] 

459 pname = p.name 

460 plen = p.len 

461 

462 # Get production function 

463 sym = YaccSymbol() 

464 sym.type = pname # Production name 

465 sym.value = None 

466 

467 #--! DEBUG 

468 if plen: 

469 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, 

470 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', 

471 goto[statestack[-1-plen]][pname]) 

472 else: 

473 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], 

474 goto[statestack[-1]][pname]) 

475 

476 #--! DEBUG 

477 

478 if plen: 

479 targ = symstack[-plen-1:] 

480 targ[0] = sym 

481 

482 #--! TRACKING 

483 if tracking: 

484 t1 = targ[1] 

485 sym.lineno = t1.lineno 

486 sym.lexpos = t1.lexpos 

487 t1 = targ[-1] 

488 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 

489 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 

490 #--! TRACKING 

491 

492 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

493 # The code enclosed in this section is duplicated 

494 # below as a performance optimization. Make sure 

495 # changes get made in both locations. 

496 

497 pslice.slice = targ 

498 

499 try: 

500 # Call the grammar rule with our special slice object 

501 del symstack[-plen:] 

502 self.state = state 

503 p.callable(pslice) 

504 del statestack[-plen:] 

505 #--! DEBUG 

506 debug.info('Result : %s', format_result(pslice[0])) 

507 #--! DEBUG 

508 symstack.append(sym) 

509 state = goto[statestack[-1]][pname] 

510 statestack.append(state) 

511 except SyntaxError: 

512 # If an error was set. Enter error recovery state 

513 lookaheadstack.append(lookahead) # Save the current lookahead token 

514 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

515 statestack.pop() # Pop back one state (before the reduce) 

516 state = statestack[-1] 

517 sym.type = 'error' 

518 sym.value = 'error' 

519 lookahead = sym 

520 errorcount = error_count 

521 self.errorok = False 

522 

523 continue 

524 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

525 

526 else: 

527 

528 #--! TRACKING 

529 if tracking: 

530 sym.lineno = lexer.lineno 

531 sym.lexpos = lexer.lexpos 

532 #--! TRACKING 

533 

534 targ = [sym] 

535 

536 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

537 # The code enclosed in this section is duplicated 

538 # above as a performance optimization. Make sure 

539 # changes get made in both locations. 

540 

541 pslice.slice = targ 

542 

543 try: 

544 # Call the grammar rule with our special slice object 

545 self.state = state 

546 p.callable(pslice) 

547 #--! DEBUG 

548 debug.info('Result : %s', format_result(pslice[0])) 

549 #--! DEBUG 

550 symstack.append(sym) 

551 state = goto[statestack[-1]][pname] 

552 statestack.append(state) 

553 except SyntaxError: 

554 # If an error was set. Enter error recovery state 

555 lookaheadstack.append(lookahead) # Save the current lookahead token 

556 statestack.pop() # Pop back one state (before the reduce) 

557 state = statestack[-1] 

558 sym.type = 'error' 

559 sym.value = 'error' 

560 lookahead = sym 

561 errorcount = error_count 

562 self.errorok = False 

563 

564 continue 

565 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

566 

567 if t == 0: 

568 n = symstack[-1] 

569 result = getattr(n, 'value', None) 

570 #--! DEBUG 

571 debug.info('Done : Returning %s', format_result(result)) 

572 debug.info('PLY: PARSE DEBUG END') 

573 #--! DEBUG 

574 return result 

575 

576 if t is None: 

577 

578 #--! DEBUG 

579 debug.error('Error : %s', 

580 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 

581 #--! DEBUG 

582 

583 # We have some kind of parsing error here. To handle 

584 # this, we are going to push the current token onto 

585 # the tokenstack and replace it with an 'error' token. 

586 # If there are any synchronization rules, they may 

587 # catch it. 

588 # 

589 # In addition to pushing the error token, we call call 

590 # the user defined p_error() function if this is the 

591 # first syntax error. This function is only called if 

592 # errorcount == 0. 

593 if errorcount == 0 or self.errorok: 

594 errorcount = error_count 

595 self.errorok = False 

596 errtoken = lookahead 

597 if errtoken.type == '$end': 

598 errtoken = None # End of file! 

599 if self.errorfunc: 

600 if errtoken and not hasattr(errtoken, 'lexer'): 

601 errtoken.lexer = lexer 

602 self.state = state 

603 tok = call_errorfunc(self.errorfunc, errtoken, self) 

604 if self.errorok: 

605 # User must have done some kind of panic 

606 # mode recovery on their own. The 

607 # returned token is the next lookahead 

608 lookahead = tok 

609 errtoken = None 

610 continue 

611 else: 

612 if errtoken: 

613 if hasattr(errtoken, 'lineno'): 

614 lineno = lookahead.lineno 

615 else: 

616 lineno = 0 

617 if lineno: 

618 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 

619 else: 

620 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 

621 else: 

622 sys.stderr.write('yacc: Parse error in input. EOF\n') 

623 return 

624 

625 else: 

626 errorcount = error_count 

627 

628 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

629 # entire parse has been rolled back and we're completely hosed. The token is 

630 # discarded and we just keep going. 

631 

632 if len(statestack) <= 1 and lookahead.type != '$end': 

633 lookahead = None 

634 errtoken = None 

635 state = 0 

636 # Nuke the pushback stack 

637 del lookaheadstack[:] 

638 continue 

639 

640 # case 2: the statestack has a couple of entries on it, but we're 

641 # at the end of the file. nuke the top entry and generate an error token 

642 

643 # Start nuking entries on the stack 

644 if lookahead.type == '$end': 

645 # Whoa. We're really hosed here. Bail out 

646 return 

647 

648 if lookahead.type != 'error': 

649 sym = symstack[-1] 

650 if sym.type == 'error': 

651 # Hmmm. Error is on top of stack, we'll just nuke input 

652 # symbol and continue 

653 #--! TRACKING 

654 if tracking: 

655 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 

656 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 

657 #--! TRACKING 

658 lookahead = None 

659 continue 

660 

661 # Create the error symbol for the first time and make it the new lookahead symbol 

662 t = YaccSymbol() 

663 t.type = 'error' 

664 

665 if hasattr(lookahead, 'lineno'): 

666 t.lineno = t.endlineno = lookahead.lineno 

667 if hasattr(lookahead, 'lexpos'): 

668 t.lexpos = t.endlexpos = lookahead.lexpos 

669 t.value = lookahead 

670 lookaheadstack.append(lookahead) 

671 lookahead = t 

672 else: 

673 sym = symstack.pop() 

674 #--! TRACKING 

675 if tracking: 

676 lookahead.lineno = sym.lineno 

677 lookahead.lexpos = sym.lexpos 

678 #--! TRACKING 

679 statestack.pop() 

680 state = statestack[-1] 

681 

682 continue 

683 

684 # Call an error function here 

685 raise RuntimeError('yacc: internal parser error!!!\n') 

686 

687 #--! parsedebug-end 

688 

689 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

690 # parseopt(). 

691 # 

692 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! 

693 # This code is automatically generated by the ply/ygen.py script. Make 

694 # changes to the parsedebug() method instead. 

695 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

696 

697 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

698 #--! parseopt-start 

699 lookahead = None # Current lookahead symbol 

700 lookaheadstack = [] # Stack of lookahead symbols 

701 actions = self.action # Local reference to action table (to avoid lookup on self.) 

702 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

703 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

704 defaulted_states = self.defaulted_states # Local reference to defaulted states 

705 pslice = YaccProduction(None) # Production object passed to grammar rules 

706 errorcount = 0 # Used during error recovery 

707 

708 

709 # If no lexer was given, we will try to use the lex module 

710 if not lexer: 

711 from . import lex 

712 lexer = lex.lexer 

713 

714 # Set up the lexer and parser objects on pslice 

715 pslice.lexer = lexer 

716 pslice.parser = self 

717 

718 # If input was supplied, pass to lexer 

719 if input is not None: 

720 lexer.input(input) 

721 

722 if tokenfunc is None: 

723 # Tokenize function 

724 get_token = lexer.token 

725 else: 

726 get_token = tokenfunc 

727 

728 # Set the parser() token method (sometimes used in error recovery) 

729 self.token = get_token 

730 

731 # Set up the state and symbol stacks 

732 

733 statestack = [] # Stack of parsing states 

734 self.statestack = statestack 

735 symstack = [] # Stack of grammar symbols 

736 self.symstack = symstack 

737 

738 pslice.stack = symstack # Put in the production 

739 errtoken = None # Err token 

740 

741 # The start state is assumed to be (0,$end) 

742 

743 statestack.append(0) 

744 sym = YaccSymbol() 

745 sym.type = '$end' 

746 symstack.append(sym) 

747 state = 0 

748 while True: 

749 # Get the next symbol on the input. If a lookahead symbol 

750 # is already set, we just use that. Otherwise, we'll pull 

751 # the next token off of the lookaheadstack or from the lexer 

752 

753 

754 if state not in defaulted_states: 

755 if not lookahead: 

756 if not lookaheadstack: 

757 lookahead = get_token() # Get the next token 

758 else: 

759 lookahead = lookaheadstack.pop() 

760 if not lookahead: 

761 lookahead = YaccSymbol() 

762 lookahead.type = '$end' 

763 

764 # Check the action table 

765 ltype = lookahead.type 

766 t = actions[state].get(ltype) 

767 else: 

768 t = defaulted_states[state] 

769 

770 

771 if t is not None: 

772 if t > 0: 

773 # shift a symbol on the stack 

774 statestack.append(t) 

775 state = t 

776 

777 

778 symstack.append(lookahead) 

779 lookahead = None 

780 

781 # Decrease error count on successful shift 

782 if errorcount: 

783 errorcount -= 1 

784 continue 

785 

786 if t < 0: 

787 # reduce a symbol on the stack, emit a production 

788 p = prod[-t] 

789 pname = p.name 

790 plen = p.len 

791 

792 # Get production function 

793 sym = YaccSymbol() 

794 sym.type = pname # Production name 

795 sym.value = None 

796 

797 

798 if plen: 

799 targ = symstack[-plen-1:] 

800 targ[0] = sym 

801 

802 #--! TRACKING 

803 if tracking: 

804 t1 = targ[1] 

805 sym.lineno = t1.lineno 

806 sym.lexpos = t1.lexpos 

807 t1 = targ[-1] 

808 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 

809 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 

810 #--! TRACKING 

811 

812 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

813 # The code enclosed in this section is duplicated 

814 # below as a performance optimization. Make sure 

815 # changes get made in both locations. 

816 

817 pslice.slice = targ 

818 

819 try: 

820 # Call the grammar rule with our special slice object 

821 del symstack[-plen:] 

822 self.state = state 

823 p.callable(pslice) 

824 del statestack[-plen:] 

825 symstack.append(sym) 

826 state = goto[statestack[-1]][pname] 

827 statestack.append(state) 

828 except SyntaxError: 

829 # If an error was set. Enter error recovery state 

830 lookaheadstack.append(lookahead) # Save the current lookahead token 

831 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

832 statestack.pop() # Pop back one state (before the reduce) 

833 state = statestack[-1] 

834 sym.type = 'error' 

835 sym.value = 'error' 

836 lookahead = sym 

837 errorcount = error_count 

838 self.errorok = False 

839 

840 continue 

841 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

842 

843 else: 

844 

845 #--! TRACKING 

846 if tracking: 

847 sym.lineno = lexer.lineno 

848 sym.lexpos = lexer.lexpos 

849 #--! TRACKING 

850 

851 targ = [sym] 

852 

853 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

854 # The code enclosed in this section is duplicated 

855 # above as a performance optimization. Make sure 

856 # changes get made in both locations. 

857 

858 pslice.slice = targ 

859 

860 try: 

861 # Call the grammar rule with our special slice object 

862 self.state = state 

863 p.callable(pslice) 

864 symstack.append(sym) 

865 state = goto[statestack[-1]][pname] 

866 statestack.append(state) 

867 except SyntaxError: 

868 # If an error was set. Enter error recovery state 

869 lookaheadstack.append(lookahead) # Save the current lookahead token 

870 statestack.pop() # Pop back one state (before the reduce) 

871 state = statestack[-1] 

872 sym.type = 'error' 

873 sym.value = 'error' 

874 lookahead = sym 

875 errorcount = error_count 

876 self.errorok = False 

877 

878 continue 

879 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

880 

881 if t == 0: 

882 n = symstack[-1] 

883 result = getattr(n, 'value', None) 

884 return result 

885 

886 if t is None: 

887 

888 

889 # We have some kind of parsing error here. To handle 

890 # this, we are going to push the current token onto 

891 # the tokenstack and replace it with an 'error' token. 

892 # If there are any synchronization rules, they may 

893 # catch it. 

894 # 

895 # In addition to pushing the error token, we call call 

896 # the user defined p_error() function if this is the 

897 # first syntax error. This function is only called if 

898 # errorcount == 0. 

899 if errorcount == 0 or self.errorok: 

900 errorcount = error_count 

901 self.errorok = False 

902 errtoken = lookahead 

903 if errtoken.type == '$end': 

904 errtoken = None # End of file! 

905 if self.errorfunc: 

906 if errtoken and not hasattr(errtoken, 'lexer'): 

907 errtoken.lexer = lexer 

908 self.state = state 

909 tok = call_errorfunc(self.errorfunc, errtoken, self) 

910 if self.errorok: 

911 # User must have done some kind of panic 

912 # mode recovery on their own. The 

913 # returned token is the next lookahead 

914 lookahead = tok 

915 errtoken = None 

916 continue 

917 else: 

918 if errtoken: 

919 if hasattr(errtoken, 'lineno'): 

920 lineno = lookahead.lineno 

921 else: 

922 lineno = 0 

923 if lineno: 

924 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 

925 else: 

926 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 

927 else: 

928 sys.stderr.write('yacc: Parse error in input. EOF\n') 

929 return 

930 

931 else: 

932 errorcount = error_count 

933 

934 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

935 # entire parse has been rolled back and we're completely hosed. The token is 

936 # discarded and we just keep going. 

937 

938 if len(statestack) <= 1 and lookahead.type != '$end': 

939 lookahead = None 

940 errtoken = None 

941 state = 0 

942 # Nuke the pushback stack 

943 del lookaheadstack[:] 

944 continue 

945 

946 # case 2: the statestack has a couple of entries on it, but we're 

947 # at the end of the file. nuke the top entry and generate an error token 

948 

949 # Start nuking entries on the stack 

950 if lookahead.type == '$end': 

951 # Whoa. We're really hosed here. Bail out 

952 return 

953 

954 if lookahead.type != 'error': 

955 sym = symstack[-1] 

956 if sym.type == 'error': 

957 # Hmmm. Error is on top of stack, we'll just nuke input 

958 # symbol and continue 

959 #--! TRACKING 

960 if tracking: 

961 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 

962 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 

963 #--! TRACKING 

964 lookahead = None 

965 continue 

966 

967 # Create the error symbol for the first time and make it the new lookahead symbol 

968 t = YaccSymbol() 

969 t.type = 'error' 

970 

971 if hasattr(lookahead, 'lineno'): 

972 t.lineno = t.endlineno = lookahead.lineno 

973 if hasattr(lookahead, 'lexpos'): 

974 t.lexpos = t.endlexpos = lookahead.lexpos 

975 t.value = lookahead 

976 lookaheadstack.append(lookahead) 

977 lookahead = t 

978 else: 

979 sym = symstack.pop() 

980 #--! TRACKING 

981 if tracking: 

982 lookahead.lineno = sym.lineno 

983 lookahead.lexpos = sym.lexpos 

984 #--! TRACKING 

985 statestack.pop() 

986 state = statestack[-1] 

987 

988 continue 

989 

990 # Call an error function here 

991 raise RuntimeError('yacc: internal parser error!!!\n') 

992 

993 #--! parseopt-end 

994 

995 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

996 # parseopt_notrack(). 

997 # 

998 # Optimized version of parseopt() with line number tracking removed. 

999 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated 

1000 # by the ply/ygen.py script. Make changes to the parsedebug() method instead. 

1001 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1002 

1003 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 

1004 #--! parseopt-notrack-start 

1005 lookahead = None # Current lookahead symbol 

1006 lookaheadstack = [] # Stack of lookahead symbols 

1007 actions = self.action # Local reference to action table (to avoid lookup on self.) 

1008 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 

1009 prod = self.productions # Local reference to production list (to avoid lookup on self.) 

1010 defaulted_states = self.defaulted_states # Local reference to defaulted states 

1011 pslice = YaccProduction(None) # Production object passed to grammar rules 

1012 errorcount = 0 # Used during error recovery 

1013 

1014 

1015 # If no lexer was given, we will try to use the lex module 

1016 if not lexer: 

1017 from . import lex 

1018 lexer = lex.lexer 

1019 

1020 # Set up the lexer and parser objects on pslice 

1021 pslice.lexer = lexer 

1022 pslice.parser = self 

1023 

1024 # If input was supplied, pass to lexer 

1025 if input is not None: 

1026 lexer.input(input) 

1027 

1028 if tokenfunc is None: 

1029 # Tokenize function 

1030 get_token = lexer.token 

1031 else: 

1032 get_token = tokenfunc 

1033 

1034 # Set the parser() token method (sometimes used in error recovery) 

1035 self.token = get_token 

1036 

1037 # Set up the state and symbol stacks 

1038 

1039 statestack = [] # Stack of parsing states 

1040 self.statestack = statestack 

1041 symstack = [] # Stack of grammar symbols 

1042 self.symstack = symstack 

1043 

1044 pslice.stack = symstack # Put in the production 

1045 errtoken = None # Err token 

1046 

1047 # The start state is assumed to be (0,$end) 

1048 

1049 statestack.append(0) 

1050 sym = YaccSymbol() 

1051 sym.type = '$end' 

1052 symstack.append(sym) 

1053 state = 0 

1054 while True: 

1055 # Get the next symbol on the input. If a lookahead symbol 

1056 # is already set, we just use that. Otherwise, we'll pull 

1057 # the next token off of the lookaheadstack or from the lexer 

1058 

1059 

1060 if state not in defaulted_states: 

1061 if not lookahead: 

1062 if not lookaheadstack: 

1063 lookahead = get_token() # Get the next token 

1064 else: 

1065 lookahead = lookaheadstack.pop() 

1066 if not lookahead: 

1067 lookahead = YaccSymbol() 

1068 lookahead.type = '$end' 

1069 

1070 # Check the action table 

1071 ltype = lookahead.type 

1072 t = actions[state].get(ltype) 

1073 else: 

1074 t = defaulted_states[state] 

1075 

1076 

1077 if t is not None: 

1078 if t > 0: 

1079 # shift a symbol on the stack 

1080 statestack.append(t) 

1081 state = t 

1082 

1083 

1084 symstack.append(lookahead) 

1085 lookahead = None 

1086 

1087 # Decrease error count on successful shift 

1088 if errorcount: 

1089 errorcount -= 1 

1090 continue 

1091 

1092 if t < 0: 

1093 # reduce a symbol on the stack, emit a production 

1094 p = prod[-t] 

1095 pname = p.name 

1096 plen = p.len 

1097 

1098 # Get production function 

1099 sym = YaccSymbol() 

1100 sym.type = pname # Production name 

1101 sym.value = None 

1102 

1103 

1104 if plen: 

1105 targ = symstack[-plen-1:] 

1106 targ[0] = sym 

1107 

1108 

1109 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1110 # The code enclosed in this section is duplicated 

1111 # below as a performance optimization. Make sure 

1112 # changes get made in both locations. 

1113 

1114 pslice.slice = targ 

1115 

1116 try: 

1117 # Call the grammar rule with our special slice object 

1118 del symstack[-plen:] 

1119 self.state = state 

1120 p.callable(pslice) 

1121 del statestack[-plen:] 

1122 symstack.append(sym) 

1123 state = goto[statestack[-1]][pname] 

1124 statestack.append(state) 

1125 except SyntaxError: 

1126 # If an error was set. Enter error recovery state 

1127 lookaheadstack.append(lookahead) # Save the current lookahead token 

1128 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 

1129 statestack.pop() # Pop back one state (before the reduce) 

1130 state = statestack[-1] 

1131 sym.type = 'error' 

1132 sym.value = 'error' 

1133 lookahead = sym 

1134 errorcount = error_count 

1135 self.errorok = False 

1136 

1137 continue 

1138 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1139 

1140 else: 

1141 

1142 

1143 targ = [sym] 

1144 

1145 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1146 # The code enclosed in this section is duplicated 

1147 # above as a performance optimization. Make sure 

1148 # changes get made in both locations. 

1149 

1150 pslice.slice = targ 

1151 

1152 try: 

1153 # Call the grammar rule with our special slice object 

1154 self.state = state 

1155 p.callable(pslice) 

1156 symstack.append(sym) 

1157 state = goto[statestack[-1]][pname] 

1158 statestack.append(state) 

1159 except SyntaxError: 

1160 # If an error was set. Enter error recovery state 

1161 lookaheadstack.append(lookahead) # Save the current lookahead token 

1162 statestack.pop() # Pop back one state (before the reduce) 

1163 state = statestack[-1] 

1164 sym.type = 'error' 

1165 sym.value = 'error' 

1166 lookahead = sym 

1167 errorcount = error_count 

1168 self.errorok = False 

1169 

1170 continue 

1171 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 

1172 

1173 if t == 0: 

1174 n = symstack[-1] 

1175 result = getattr(n, 'value', None) 

1176 return result 

1177 

1178 if t is None: 

1179 

1180 

1181 # We have some kind of parsing error here. To handle 

1182 # this, we are going to push the current token onto 

1183 # the tokenstack and replace it with an 'error' token. 

1184 # If there are any synchronization rules, they may 

1185 # catch it. 

1186 # 

1187 # In addition to pushing the error token, we call call 

1188 # the user defined p_error() function if this is the 

1189 # first syntax error. This function is only called if 

1190 # errorcount == 0. 

1191 if errorcount == 0 or self.errorok: 

1192 errorcount = error_count 

1193 self.errorok = False 

1194 errtoken = lookahead 

1195 if errtoken.type == '$end': 

1196 errtoken = None # End of file! 

1197 if self.errorfunc: 

1198 if errtoken and not hasattr(errtoken, 'lexer'): 

1199 errtoken.lexer = lexer 

1200 self.state = state 

1201 tok = call_errorfunc(self.errorfunc, errtoken, self) 

1202 if self.errorok: 

1203 # User must have done some kind of panic 

1204 # mode recovery on their own. The 

1205 # returned token is the next lookahead 

1206 lookahead = tok 

1207 errtoken = None 

1208 continue 

1209 else: 

1210 if errtoken: 

1211 if hasattr(errtoken, 'lineno'): 

1212 lineno = lookahead.lineno 

1213 else: 

1214 lineno = 0 

1215 if lineno: 

1216 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 

1217 else: 

1218 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 

1219 else: 

1220 sys.stderr.write('yacc: Parse error in input. EOF\n') 

1221 return 

1222 

1223 else: 

1224 errorcount = error_count 

1225 

1226 # case 1: the statestack only has 1 entry on it. If we're in this state, the 

1227 # entire parse has been rolled back and we're completely hosed. The token is 

1228 # discarded and we just keep going. 

1229 

1230 if len(statestack) <= 1 and lookahead.type != '$end': 

1231 lookahead = None 

1232 errtoken = None 

1233 state = 0 

1234 # Nuke the pushback stack 

1235 del lookaheadstack[:] 

1236 continue 

1237 

1238 # case 2: the statestack has a couple of entries on it, but we're 

1239 # at the end of the file. nuke the top entry and generate an error token 

1240 

1241 # Start nuking entries on the stack 

1242 if lookahead.type == '$end': 

1243 # Whoa. We're really hosed here. Bail out 

1244 return 

1245 

1246 if lookahead.type != 'error': 

1247 sym = symstack[-1] 

1248 if sym.type == 'error': 

1249 # Hmmm. Error is on top of stack, we'll just nuke input 

1250 # symbol and continue 

1251 lookahead = None 

1252 continue 

1253 

1254 # Create the error symbol for the first time and make it the new lookahead symbol 

1255 t = YaccSymbol() 

1256 t.type = 'error' 

1257 

1258 if hasattr(lookahead, 'lineno'): 

1259 t.lineno = t.endlineno = lookahead.lineno 

1260 if hasattr(lookahead, 'lexpos'): 

1261 t.lexpos = t.endlexpos = lookahead.lexpos 

1262 t.value = lookahead 

1263 lookaheadstack.append(lookahead) 

1264 lookahead = t 

1265 else: 

1266 sym = symstack.pop() 

1267 statestack.pop() 

1268 state = statestack[-1] 

1269 

1270 continue 

1271 

1272 # Call an error function here 

1273 raise RuntimeError('yacc: internal parser error!!!\n') 

1274 

1275 #--! parseopt-notrack-end 

1276 

1277# ----------------------------------------------------------------------------- 

1278# === Grammar Representation === 

1279# 

1280# The following functions, classes, and variables are used to represent and 

1281# manipulate the rules that make up a grammar. 

1282# ----------------------------------------------------------------------------- 

1283 

1284# regex matching identifiers 

1285_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 

1286 

1287# ----------------------------------------------------------------------------- 

1288# class Production: 

1289# 

1290# This class stores the raw information about a single production or grammar rule. 

1291# A grammar rule refers to a specification such as this: 

1292# 

1293# expr : expr PLUS term 

1294# 

1295# Here are the basic attributes defined on all productions 

1296# 

1297# name - Name of the production. For example 'expr' 

1298# prod - A list of symbols on the right side ['expr','PLUS','term'] 

1299# prec - Production precedence level 

1300# number - Production number. 

1301# func - Function that executes on reduce 

1302# file - File where production function is defined 

1303# lineno - Line number where production function is defined 

1304# 

1305# The following attributes are defined or optional. 

1306# 

1307# len - Length of the production (number of symbols on right hand side) 

1308# usyms - Set of unique symbols found in the production 

1309# ----------------------------------------------------------------------------- 

1310 

1311class Production(object): 

1312 reduced = 0 

1313 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): 

1314 self.name = name 

1315 self.prod = tuple(prod) 

1316 self.number = number 

1317 self.func = func 

1318 self.callable = None 

1319 self.file = file 

1320 self.line = line 

1321 self.prec = precedence 

1322 

1323 # Internal settings used during table construction 

1324 

1325 self.len = len(self.prod) # Length of the production 

1326 

1327 # Create a list of unique production symbols used in the production 

1328 self.usyms = [] 

1329 for s in self.prod: 

1330 if s not in self.usyms: 

1331 self.usyms.append(s) 

1332 

1333 # List of all LR items for the production 

1334 self.lr_items = [] 

1335 self.lr_next = None 

1336 

1337 # Create a string representation 

1338 if self.prod: 

1339 self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) 

1340 else: 

1341 self.str = '%s -> <empty>' % self.name 

1342 

1343 def __str__(self): 

1344 return self.str 

1345 

1346 def __repr__(self): 

1347 return 'Production(' + str(self) + ')' 

1348 

1349 def __len__(self): 

1350 return len(self.prod) 

1351 

1352 def __nonzero__(self): 

1353 return 1 

1354 

1355 def __getitem__(self, index): 

1356 return self.prod[index] 

1357 

1358 # Return the nth lr_item from the production (or None if at the end) 

1359 def lr_item(self, n): 

1360 if n > len(self.prod): 

1361 return None 

1362 p = LRItem(self, n) 

1363 # Precompute the list of productions immediately following. 

1364 try: 

1365 p.lr_after = self.Prodnames[p.prod[n+1]] 

1366 except (IndexError, KeyError): 

1367 p.lr_after = [] 

1368 try: 

1369 p.lr_before = p.prod[n-1] 

1370 except IndexError: 

1371 p.lr_before = None 

1372 return p 

1373 

1374 # Bind the production function name to a callable 

1375 def bind(self, pdict): 

1376 if self.func: 

1377 self.callable = pdict[self.func] 

1378 

1379# This class serves as a minimal standin for Production objects when 

1380# reading table data from files. It only contains information 

1381# actually used by the LR parsing engine, plus some additional 

1382# debugging information. 

1383class MiniProduction(object): 

1384 def __init__(self, str, name, len, func, file, line): 

1385 self.name = name 

1386 self.len = len 

1387 self.func = func 

1388 self.callable = None 

1389 self.file = file 

1390 self.line = line 

1391 self.str = str 

1392 

1393 def __str__(self): 

1394 return self.str 

1395 

1396 def __repr__(self): 

1397 return 'MiniProduction(%s)' % self.str 

1398 

1399 # Bind the production function name to a callable 

1400 def bind(self, pdict): 

1401 if self.func: 

1402 self.callable = pdict[self.func] 

1403 

1404 

1405# ----------------------------------------------------------------------------- 

1406# class LRItem 

1407# 

1408# This class represents a specific stage of parsing a production rule. For 

1409# example: 

1410# 

1411# expr : expr . PLUS term 

1412# 

1413# In the above, the "." represents the current location of the parse. Here 

1414# basic attributes: 

1415# 

1416# name - Name of the production. For example 'expr' 

1417# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 

1418# number - Production number. 

1419# 

1420# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 

1421# then lr_next refers to 'expr -> expr PLUS . term' 

1422# lr_index - LR item index (location of the ".") in the prod list. 

1423# lookaheads - LALR lookahead symbols for this item 

1424# len - Length of the production (number of symbols on right hand side) 

1425# lr_after - List of all productions that immediately follow 

1426# lr_before - Grammar symbol immediately before 

1427# ----------------------------------------------------------------------------- 

1428 

1429class LRItem(object): 

1430 def __init__(self, p, n): 

1431 self.name = p.name 

1432 self.prod = list(p.prod) 

1433 self.number = p.number 

1434 self.lr_index = n 

1435 self.lookaheads = {} 

1436 self.prod.insert(n, '.') 

1437 self.prod = tuple(self.prod) 

1438 self.len = len(self.prod) 

1439 self.usyms = p.usyms 

1440 

1441 def __str__(self): 

1442 if self.prod: 

1443 s = '%s -> %s' % (self.name, ' '.join(self.prod)) 

1444 else: 

1445 s = '%s -> <empty>' % self.name 

1446 return s 

1447 

1448 def __repr__(self): 

1449 return 'LRItem(' + str(self) + ')' 

1450 

1451# ----------------------------------------------------------------------------- 

1452# rightmost_terminal() 

1453# 

1454# Return the rightmost terminal from a list of symbols. Used in add_production() 

1455# ----------------------------------------------------------------------------- 

1456def rightmost_terminal(symbols, terminals): 

1457 i = len(symbols) - 1 

1458 while i >= 0: 

1459 if symbols[i] in terminals: 

1460 return symbols[i] 

1461 i -= 1 

1462 return None 

1463 

1464# ----------------------------------------------------------------------------- 

1465# === GRAMMAR CLASS === 

1466# 

1467# The following class represents the contents of the specified grammar along 

1468# with various computed properties such as first sets, follow sets, LR items, etc. 

1469# This data is used for critical parts of the table generation process later. 

1470# ----------------------------------------------------------------------------- 

1471 

1472class GrammarError(YaccError): 

1473 pass 

1474 

1475class Grammar(object): 

1476 def __init__(self, terminals): 

1477 self.Productions = [None] # A list of all of the productions. The first 

1478 # entry is always reserved for the purpose of 

1479 # building an augmented grammar 

1480 

1481 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all 

1482 # productions of that nonterminal. 

1483 

1484 self.Prodmap = {} # A dictionary that is only used to detect duplicate 

1485 # productions. 

1486 

1487 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a 

1488 # list of the rules where they are used. 

1489 

1490 for term in terminals: 

1491 self.Terminals[term] = [] 

1492 

1493 self.Terminals['error'] = [] 

1494 

1495 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list 

1496 # of rule numbers where they are used. 

1497 

1498 self.First = {} # A dictionary of precomputed FIRST(x) symbols 

1499 

1500 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols 

1501 

1502 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the 

1503 # form ('right',level) or ('nonassoc', level) or ('left',level) 

1504 

1505 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. 

1506 # This is only used to provide error checking and to generate 

1507 # a warning about unused precedence rules. 

1508 

1509 self.Start = None # Starting symbol for the grammar 

1510 

1511 

1512 def __len__(self): 

1513 return len(self.Productions) 

1514 

1515 def __getitem__(self, index): 

1516 return self.Productions[index] 

1517 

1518 # ----------------------------------------------------------------------------- 

1519 # set_precedence() 

1520 # 

1521 # Sets the precedence for a given terminal. assoc is the associativity such as 

1522 # 'left','right', or 'nonassoc'. level is a numeric level. 

1523 # 

1524 # ----------------------------------------------------------------------------- 

1525 

1526 def set_precedence(self, term, assoc, level): 

1527 assert self.Productions == [None], 'Must call set_precedence() before add_production()' 

1528 if term in self.Precedence: 

1529 raise GrammarError('Precedence already specified for terminal %r' % term) 

1530 if assoc not in ['left', 'right', 'nonassoc']: 

1531 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 

1532 self.Precedence[term] = (assoc, level) 

1533 

1534 # ----------------------------------------------------------------------------- 

1535 # add_production() 

1536 # 

1537 # Given an action function, this function assembles a production rule and 

1538 # computes its precedence level. 

1539 # 

1540 # The production rule is supplied as a list of symbols. For example, 

1541 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 

1542 # symbols ['expr','PLUS','term']. 

1543 # 

1544 # Precedence is determined by the precedence of the right-most non-terminal 

1545 # or the precedence of a terminal specified by %prec. 

1546 # 

1547 # A variety of error checks are performed to make sure production symbols 

1548 # are valid and that %prec is used correctly. 

1549 # ----------------------------------------------------------------------------- 

1550 

1551 def add_production(self, prodname, syms, func=None, file='', line=0): 

1552 

1553 if prodname in self.Terminals: 

1554 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) 

1555 if prodname == 'error': 

1556 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) 

1557 if not _is_identifier.match(prodname): 

1558 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) 

1559 

1560 # Look for literal tokens 

1561 for n, s in enumerate(syms): 

1562 if s[0] in "'\"": 

1563 try: 

1564 c = eval(s) 

1565 if (len(c) > 1): 

1566 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % 

1567 (file, line, s, prodname)) 

1568 if c not in self.Terminals: 

1569 self.Terminals[c] = [] 

1570 syms[n] = c 

1571 continue 

1572 except SyntaxError: 

1573 pass 

1574 if not _is_identifier.match(s) and s != '%prec': 

1575 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) 

1576 

1577 # Determine the precedence level 

1578 if '%prec' in syms: 

1579 if syms[-1] == '%prec': 

1580 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) 

1581 if syms[-2] != '%prec': 

1582 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % 

1583 (file, line)) 

1584 precname = syms[-1] 

1585 prodprec = self.Precedence.get(precname) 

1586 if not prodprec: 

1587 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) 

1588 else: 

1589 self.UsedPrecedence.add(precname) 

1590 del syms[-2:] # Drop %prec from the rule 

1591 else: 

1592 # If no %prec, precedence is determined by the rightmost terminal symbol 

1593 precname = rightmost_terminal(syms, self.Terminals) 

1594 prodprec = self.Precedence.get(precname, ('right', 0)) 

1595 

1596 # See if the rule is already in the rulemap 

1597 map = '%s -> %s' % (prodname, syms) 

1598 if map in self.Prodmap: 

1599 m = self.Prodmap[map] 

1600 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + 

1601 'Previous definition at %s:%d' % (m.file, m.line)) 

1602 

1603 # From this point on, everything is valid. Create a new Production instance 

1604 pnumber = len(self.Productions) 

1605 if prodname not in self.Nonterminals: 

1606 self.Nonterminals[prodname] = [] 

1607 

1608 # Add the production number to Terminals and Nonterminals 

1609 for t in syms: 

1610 if t in self.Terminals: 

1611 self.Terminals[t].append(pnumber) 

1612 else: 

1613 if t not in self.Nonterminals: 

1614 self.Nonterminals[t] = [] 

1615 self.Nonterminals[t].append(pnumber) 

1616 

1617 # Create a production and add it to the list of productions 

1618 p = Production(pnumber, prodname, syms, prodprec, func, file, line) 

1619 self.Productions.append(p) 

1620 self.Prodmap[map] = p 

1621 

1622 # Add to the global productions list 

1623 try: 

1624 self.Prodnames[prodname].append(p) 

1625 except KeyError: 

1626 self.Prodnames[prodname] = [p] 

1627 

1628 # ----------------------------------------------------------------------------- 

1629 # set_start() 

1630 # 

1631 # Sets the starting symbol and creates the augmented grammar. Production 

1632 # rule 0 is S' -> start where start is the start symbol. 

1633 # ----------------------------------------------------------------------------- 

1634 

1635 def set_start(self, start=None): 

1636 if not start: 

1637 start = self.Productions[1].name 

1638 if start not in self.Nonterminals: 

1639 raise GrammarError('start symbol %s undefined' % start) 

1640 self.Productions[0] = Production(0, "S'", [start]) 

1641 self.Nonterminals[start].append(0) 

1642 self.Start = start 

1643 

1644 # ----------------------------------------------------------------------------- 

1645 # find_unreachable() 

1646 # 

1647 # Find all of the nonterminal symbols that can't be reached from the starting 

1648 # symbol. Returns a list of nonterminals that can't be reached. 

1649 # ----------------------------------------------------------------------------- 

1650 

1651 def find_unreachable(self): 

1652 

1653 # Mark all symbols that are reachable from a symbol s 

1654 def mark_reachable_from(s): 

1655 if s in reachable: 

1656 return 

1657 reachable.add(s) 

1658 for p in self.Prodnames.get(s, []): 

1659 for r in p.prod: 

1660 mark_reachable_from(r) 

1661 

1662 reachable = set() 

1663 mark_reachable_from(self.Productions[0].prod[0]) 

1664 return [s for s in self.Nonterminals if s not in reachable] 

1665 

1666 # ----------------------------------------------------------------------------- 

1667 # infinite_cycles() 

1668 # 

1669 # This function looks at the various parsing rules and tries to detect 

1670 # infinite recursion cycles (grammar rules where there is no possible way 

1671 # to derive a string of only terminals). 

1672 # ----------------------------------------------------------------------------- 

1673 

1674 def infinite_cycles(self): 

1675 terminates = {} 

1676 

1677 # Terminals: 

1678 for t in self.Terminals: 

1679 terminates[t] = True 

1680 

1681 terminates['$end'] = True 

1682 

1683 # Nonterminals: 

1684 

1685 # Initialize to false: 

1686 for n in self.Nonterminals: 

1687 terminates[n] = False 

1688 

1689 # Then propagate termination until no change: 

1690 while True: 

1691 some_change = False 

1692 for (n, pl) in self.Prodnames.items(): 

1693 # Nonterminal n terminates iff any of its productions terminates. 

1694 for p in pl: 

1695 # Production p terminates iff all of its rhs symbols terminate. 

1696 for s in p.prod: 

1697 if not terminates[s]: 

1698 # The symbol s does not terminate, 

1699 # so production p does not terminate. 

1700 p_terminates = False 

1701 break 

1702 else: 

1703 # didn't break from the loop, 

1704 # so every symbol s terminates 

1705 # so production p terminates. 

1706 p_terminates = True 

1707 

1708 if p_terminates: 

1709 # symbol n terminates! 

1710 if not terminates[n]: 

1711 terminates[n] = True 

1712 some_change = True 

1713 # Don't need to consider any more productions for this n. 

1714 break 

1715 

1716 if not some_change: 

1717 break 

1718 

1719 infinite = [] 

1720 for (s, term) in terminates.items(): 

1721 if not term: 

1722 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 

1723 # s is used-but-not-defined, and we've already warned of that, 

1724 # so it would be overkill to say that it's also non-terminating. 

1725 pass 

1726 else: 

1727 infinite.append(s) 

1728 

1729 return infinite 

1730 

1731 # ----------------------------------------------------------------------------- 

1732 # undefined_symbols() 

1733 # 

1734 # Find all symbols that were used the grammar, but not defined as tokens or 

1735 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 

1736 # and prod is the production where the symbol was used. 

1737 # ----------------------------------------------------------------------------- 

1738 def undefined_symbols(self): 

1739 result = [] 

1740 for p in self.Productions: 

1741 if not p: 

1742 continue 

1743 

1744 for s in p.prod: 

1745 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 

1746 result.append((s, p)) 

1747 return result 

1748 

1749 # ----------------------------------------------------------------------------- 

1750 # unused_terminals() 

1751 # 

1752 # Find all terminals that were defined, but not used by the grammar. Returns 

1753 # a list of all symbols. 

1754 # ----------------------------------------------------------------------------- 

1755 def unused_terminals(self): 

1756 unused_tok = [] 

1757 for s, v in self.Terminals.items(): 

1758 if s != 'error' and not v: 

1759 unused_tok.append(s) 

1760 

1761 return unused_tok 

1762 

1763 # ------------------------------------------------------------------------------ 

1764 # unused_rules() 

1765 # 

1766 # Find all grammar rules that were defined, but not used (maybe not reachable) 

1767 # Returns a list of productions. 

1768 # ------------------------------------------------------------------------------ 

1769 

1770 def unused_rules(self): 

1771 unused_prod = [] 

1772 for s, v in self.Nonterminals.items(): 

1773 if not v: 

1774 p = self.Prodnames[s][0] 

1775 unused_prod.append(p) 

1776 return unused_prod 

1777 

1778 # ----------------------------------------------------------------------------- 

1779 # unused_precedence() 

1780 # 

1781 # Returns a list of tuples (term,precedence) corresponding to precedence 

1782 # rules that were never used by the grammar. term is the name of the terminal 

1783 # on which precedence was applied and precedence is a string such as 'left' or 

1784 # 'right' corresponding to the type of precedence. 

1785 # ----------------------------------------------------------------------------- 

1786 

1787 def unused_precedence(self): 

1788 unused = [] 

1789 for termname in self.Precedence: 

1790 if not (termname in self.Terminals or termname in self.UsedPrecedence): 

1791 unused.append((termname, self.Precedence[termname][0])) 

1792 

1793 return unused 

1794 

1795 # ------------------------------------------------------------------------- 

1796 # _first() 

1797 # 

1798 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 

1799 # 

1800 # During execution of compute_first1, the result may be incomplete. 

1801 # Afterward (e.g., when called from compute_follow()), it will be complete. 

1802 # ------------------------------------------------------------------------- 

1803 def _first(self, beta): 

1804 

1805 # We are computing First(x1,x2,x3,...,xn) 

1806 result = [] 

1807 for x in beta: 

1808 x_produces_empty = False 

1809 

1810 # Add all the non-<empty> symbols of First[x] to the result. 

1811 for f in self.First[x]: 

1812 if f == '<empty>': 

1813 x_produces_empty = True 

1814 else: 

1815 if f not in result: 

1816 result.append(f) 

1817 

1818 if x_produces_empty: 

1819 # We have to consider the next x in beta, 

1820 # i.e. stay in the loop. 

1821 pass 

1822 else: 

1823 # We don't have to consider any further symbols in beta. 

1824 break 

1825 else: 

1826 # There was no 'break' from the loop, 

1827 # so x_produces_empty was true for all x in beta, 

1828 # so beta produces empty as well. 

1829 result.append('<empty>') 

1830 

1831 return result 

1832 

1833 # ------------------------------------------------------------------------- 

1834 # compute_first() 

1835 # 

1836 # Compute the value of FIRST1(X) for all symbols 

1837 # ------------------------------------------------------------------------- 

1838 def compute_first(self): 

1839 if self.First: 

1840 return self.First 

1841 

1842 # Terminals: 

1843 for t in self.Terminals: 

1844 self.First[t] = [t] 

1845 

1846 self.First['$end'] = ['$end'] 

1847 

1848 # Nonterminals: 

1849 

1850 # Initialize to the empty set: 

1851 for n in self.Nonterminals: 

1852 self.First[n] = [] 

1853 

1854 # Then propagate symbols until no change: 

1855 while True: 

1856 some_change = False 

1857 for n in self.Nonterminals: 

1858 for p in self.Prodnames[n]: 

1859 for f in self._first(p.prod): 

1860 if f not in self.First[n]: 

1861 self.First[n].append(f) 

1862 some_change = True 

1863 if not some_change: 

1864 break 

1865 

1866 return self.First 

1867 

1868 # --------------------------------------------------------------------- 

1869 # compute_follow() 

1870 # 

1871 # Computes all of the follow sets for every non-terminal symbol. The 

1872 # follow set is the set of all symbols that might follow a given 

1873 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 

1874 # --------------------------------------------------------------------- 

1875 def compute_follow(self, start=None): 

1876 # If already computed, return the result 

1877 if self.Follow: 

1878 return self.Follow 

1879 

1880 # If first sets not computed yet, do that first. 

1881 if not self.First: 

1882 self.compute_first() 

1883 

1884 # Add '$end' to the follow list of the start symbol 

1885 for k in self.Nonterminals: 

1886 self.Follow[k] = [] 

1887 

1888 if not start: 

1889 start = self.Productions[1].name 

1890 

1891 self.Follow[start] = ['$end'] 

1892 

1893 while True: 

1894 didadd = False 

1895 for p in self.Productions[1:]: 

1896 # Here is the production set 

1897 for i, B in enumerate(p.prod): 

1898 if B in self.Nonterminals: 

1899 # Okay. We got a non-terminal in a production 

1900 fst = self._first(p.prod[i+1:]) 

1901 hasempty = False 

1902 for f in fst: 

1903 if f != '<empty>' and f not in self.Follow[B]: 

1904 self.Follow[B].append(f) 

1905 didadd = True 

1906 if f == '<empty>': 

1907 hasempty = True 

1908 if hasempty or i == (len(p.prod)-1): 

1909 # Add elements of follow(a) to follow(b) 

1910 for f in self.Follow[p.name]: 

1911 if f not in self.Follow[B]: 

1912 self.Follow[B].append(f) 

1913 didadd = True 

1914 if not didadd: 

1915 break 

1916 return self.Follow 

1917 

1918 

1919 # ----------------------------------------------------------------------------- 

1920 # build_lritems() 

1921 # 

1922 # This function walks the list of productions and builds a complete set of the 

1923 # LR items. The LR items are stored in two ways: First, they are uniquely 

1924 # numbered and placed in the list _lritems. Second, a linked list of LR items 

1925 # is built for each production. For example: 

1926 # 

1927 # E -> E PLUS E 

1928 # 

1929 # Creates the list 

1930 # 

1931 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 

1932 # ----------------------------------------------------------------------------- 

1933 

1934 def build_lritems(self): 

1935 for p in self.Productions: 

1936 lastlri = p 

1937 i = 0 

1938 lr_items = [] 

1939 while True: 

1940 if i > len(p): 

1941 lri = None 

1942 else: 

1943 lri = LRItem(p, i) 

1944 # Precompute the list of productions immediately following 

1945 try: 

1946 lri.lr_after = self.Prodnames[lri.prod[i+1]] 

1947 except (IndexError, KeyError): 

1948 lri.lr_after = [] 

1949 try: 

1950 lri.lr_before = lri.prod[i-1] 

1951 except IndexError: 

1952 lri.lr_before = None 

1953 

1954 lastlri.lr_next = lri 

1955 if not lri: 

1956 break 

1957 lr_items.append(lri) 

1958 lastlri = lri 

1959 i += 1 

1960 p.lr_items = lr_items 

1961 

1962# ----------------------------------------------------------------------------- 

1963# == Class LRTable == 

1964# 

1965# This basic class represents a basic table of LR parsing information. 

1966# Methods for generating the tables are not defined here. They are defined 

1967# in the derived class LRGeneratedTable. 

1968# ----------------------------------------------------------------------------- 

1969 

1970class VersionError(YaccError): 

1971 pass 

1972 

1973class LRTable(object): 

1974 def __init__(self): 

1975 self.lr_action = None 

1976 self.lr_goto = None 

1977 self.lr_productions = None 

1978 self.lr_method = None 

1979 

1980 def read_table(self, module): 

1981 if isinstance(module, types.ModuleType): 

1982 parsetab = module 

1983 else: 

1984 exec('import %s' % module) 

1985 parsetab = sys.modules[module] 

1986 

1987 if parsetab._tabversion != __tabversion__: 

1988 raise VersionError('yacc table file version is out of date') 

1989 

1990 self.lr_action = parsetab._lr_action 

1991 self.lr_goto = parsetab._lr_goto 

1992 

1993 self.lr_productions = [] 

1994 for p in parsetab._lr_productions: 

1995 self.lr_productions.append(MiniProduction(*p)) 

1996 

1997 self.lr_method = parsetab._lr_method 

1998 return parsetab._lr_signature 

1999 

2000 def read_pickle(self, filename): 

2001 try: 

2002 import cPickle as pickle 

2003 except ImportError: 

2004 import pickle 

2005 

2006 if not os.path.exists(filename): 

2007 raise ImportError 

2008 

2009 in_f = open(filename, 'rb') 

2010 

2011 tabversion = pickle.load(in_f) 

2012 if tabversion != __tabversion__: 

2013 raise VersionError('yacc table file version is out of date') 

2014 self.lr_method = pickle.load(in_f) 

2015 signature = pickle.load(in_f) 

2016 self.lr_action = pickle.load(in_f) 

2017 self.lr_goto = pickle.load(in_f) 

2018 productions = pickle.load(in_f) 

2019 

2020 self.lr_productions = [] 

2021 for p in productions: 

2022 self.lr_productions.append(MiniProduction(*p)) 

2023 

2024 in_f.close() 

2025 return signature 

2026 

2027 # Bind all production function names to callable objects in pdict 

2028 def bind_callables(self, pdict): 

2029 for p in self.lr_productions: 

2030 p.bind(pdict) 

2031 

2032 

2033# ----------------------------------------------------------------------------- 

2034# === LR Generator === 

2035# 

2036# The following classes and functions are used to generate LR parsing tables on 

2037# a grammar. 

2038# ----------------------------------------------------------------------------- 

2039 

2040# ----------------------------------------------------------------------------- 

2041# digraph() 

2042# traverse() 

2043# 

2044# The following two functions are used to compute set valued functions 

2045# of the form: 

2046# 

2047# F(x) = F'(x) U U{F(y) | x R y} 

2048# 

2049# This is used to compute the values of Read() sets as well as FOLLOW sets 

2050# in LALR(1) generation. 

2051# 

2052# Inputs: X - An input set 

2053# R - A relation 

2054# FP - Set-valued function 

2055# ------------------------------------------------------------------------------ 

2056 

2057def digraph(X, R, FP): 

2058 N = {} 

2059 for x in X: 

2060 N[x] = 0 

2061 stack = [] 

2062 F = {} 

2063 for x in X: 

2064 if N[x] == 0: 

2065 traverse(x, N, stack, F, X, R, FP) 

2066 return F 

2067 

2068def traverse(x, N, stack, F, X, R, FP): 

2069 stack.append(x) 

2070 d = len(stack) 

2071 N[x] = d 

2072 F[x] = FP(x) # F(X) <- F'(x) 

2073 

2074 rel = R(x) # Get y's related to x 

2075 for y in rel: 

2076 if N[y] == 0: 

2077 traverse(y, N, stack, F, X, R, FP) 

2078 N[x] = min(N[x], N[y]) 

2079 for a in F.get(y, []): 

2080 if a not in F[x]: 

2081 F[x].append(a) 

2082 if N[x] == d: 

2083 N[stack[-1]] = MAXINT 

2084 F[stack[-1]] = F[x] 

2085 element = stack.pop() 

2086 while element != x: 

2087 N[stack[-1]] = MAXINT 

2088 F[stack[-1]] = F[x] 

2089 element = stack.pop() 

2090 

2091class LALRError(YaccError): 

2092 pass 

2093 

2094# ----------------------------------------------------------------------------- 

2095# == LRGeneratedTable == 

2096# 

2097# This class implements the LR table generation algorithm. There are no 

2098# public methods except for write() 

2099# ----------------------------------------------------------------------------- 

2100 

2101class LRGeneratedTable(LRTable): 

2102 def __init__(self, grammar, method='LALR', log=None): 

2103 if method not in ['SLR', 'LALR']: 

2104 raise LALRError('Unsupported method %s' % method) 

2105 

2106 self.grammar = grammar 

2107 self.lr_method = method 

2108 

2109 # Set up the logger 

2110 if not log: 

2111 log = NullLogger() 

2112 self.log = log 

2113 

2114 # Internal attributes 

2115 self.lr_action = {} # Action table 

2116 self.lr_goto = {} # Goto table 

2117 self.lr_productions = grammar.Productions # Copy of grammar Production array 

2118 self.lr_goto_cache = {} # Cache of computed gotos 

2119 self.lr0_cidhash = {} # Cache of closures 

2120 

2121 self._add_count = 0 # Internal counter used to detect cycles 

2122 

2123 # Diagonistic information filled in by the table generator 

2124 self.sr_conflict = 0 

2125 self.rr_conflict = 0 

2126 self.conflicts = [] # List of conflicts 

2127 

2128 self.sr_conflicts = [] 

2129 self.rr_conflicts = [] 

2130 

2131 # Build the tables 

2132 self.grammar.build_lritems() 

2133 self.grammar.compute_first() 

2134 self.grammar.compute_follow() 

2135 self.lr_parse_table() 

2136 

2137 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 

2138 

2139 def lr0_closure(self, I): 

2140 self._add_count += 1 

2141 

2142 # Add everything in I to J 

2143 J = I[:] 

2144 didadd = True 

2145 while didadd: 

2146 didadd = False 

2147 for j in J: 

2148 for x in j.lr_after: 

2149 if getattr(x, 'lr0_added', 0) == self._add_count: 

2150 continue 

2151 # Add B --> .G to J 

2152 J.append(x.lr_next) 

2153 x.lr0_added = self._add_count 

2154 didadd = True 

2155 

2156 return J 

2157 

2158 # Compute the LR(0) goto function goto(I,X) where I is a set 

2159 # of LR(0) items and X is a grammar symbol. This function is written 

2160 # in a way that guarantees uniqueness of the generated goto sets 

2161 # (i.e. the same goto set will never be returned as two different Python 

2162 # objects). With uniqueness, we can later do fast set comparisons using 

2163 # id(obj) instead of element-wise comparison. 

2164 

2165 def lr0_goto(self, I, x): 

2166 # First we look for a previously cached entry 

2167 g = self.lr_goto_cache.get((id(I), x)) 

2168 if g: 

2169 return g 

2170 

2171 # Now we generate the goto set in a way that guarantees uniqueness 

2172 # of the result 

2173 

2174 s = self.lr_goto_cache.get(x) 

2175 if not s: 

2176 s = {} 

2177 self.lr_goto_cache[x] = s 

2178 

2179 gs = [] 

2180 for p in I: 

2181 n = p.lr_next 

2182 if n and n.lr_before == x: 

2183 s1 = s.get(id(n)) 

2184 if not s1: 

2185 s1 = {} 

2186 s[id(n)] = s1 

2187 gs.append(n) 

2188 s = s1 

2189 g = s.get('$end') 

2190 if not g: 

2191 if gs: 

2192 g = self.lr0_closure(gs) 

2193 s['$end'] = g 

2194 else: 

2195 s['$end'] = gs 

2196 self.lr_goto_cache[(id(I), x)] = g 

2197 return g 

2198 

2199 # Compute the LR(0) sets of item function 

2200 def lr0_items(self): 

2201 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] 

2202 i = 0 

2203 for I in C: 

2204 self.lr0_cidhash[id(I)] = i 

2205 i += 1 

2206 

2207 # Loop over the items in C and each grammar symbols 

2208 i = 0 

2209 while i < len(C): 

2210 I = C[i] 

2211 i += 1 

2212 

2213 # Collect all of the symbols that could possibly be in the goto(I,X) sets 

2214 asyms = {} 

2215 for ii in I: 

2216 for s in ii.usyms: 

2217 asyms[s] = None 

2218 

2219 for x in asyms: 

2220 g = self.lr0_goto(I, x) 

2221 if not g or id(g) in self.lr0_cidhash: 

2222 continue 

2223 self.lr0_cidhash[id(g)] = len(C) 

2224 C.append(g) 

2225 

2226 return C 

2227 

2228 # ----------------------------------------------------------------------------- 

2229 # ==== LALR(1) Parsing ==== 

2230 # 

2231 # LALR(1) parsing is almost exactly the same as SLR except that instead of 

2232 # relying upon Follow() sets when performing reductions, a more selective 

2233 # lookahead set that incorporates the state of the LR(0) machine is utilized. 

2234 # Thus, we mainly just have to focus on calculating the lookahead sets. 

2235 # 

2236 # The method used here is due to DeRemer and Pennelo (1982). 

2237 # 

2238 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 

2239 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 

2240 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 

2241 # 

2242 # Further details can also be found in: 

2243 # 

2244 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 

2245 # McGraw-Hill Book Company, (1985). 

2246 # 

2247 # ----------------------------------------------------------------------------- 

2248 

2249 # ----------------------------------------------------------------------------- 

2250 # compute_nullable_nonterminals() 

2251 # 

2252 # Creates a dictionary containing all of the non-terminals that might produce 

2253 # an empty production. 

2254 # ----------------------------------------------------------------------------- 

2255 

2256 def compute_nullable_nonterminals(self): 

2257 nullable = set() 

2258 num_nullable = 0 

2259 while True: 

2260 for p in self.grammar.Productions[1:]: 

2261 if p.len == 0: 

2262 nullable.add(p.name) 

2263 continue 

2264 for t in p.prod: 

2265 if t not in nullable: 

2266 break 

2267 else: 

2268 nullable.add(p.name) 

2269 if len(nullable) == num_nullable: 

2270 break 

2271 num_nullable = len(nullable) 

2272 return nullable 

2273 

2274 # ----------------------------------------------------------------------------- 

2275 # find_nonterminal_trans(C) 

2276 # 

2277 # Given a set of LR(0) items, this functions finds all of the non-terminal 

2278 # transitions. These are transitions in which a dot appears immediately before 

2279 # a non-terminal. Returns a list of tuples of the form (state,N) where state 

2280 # is the state number and N is the nonterminal symbol. 

2281 # 

2282 # The input C is the set of LR(0) items. 

2283 # ----------------------------------------------------------------------------- 

2284 

2285 def find_nonterminal_transitions(self, C): 

2286 trans = [] 

2287 for stateno, state in enumerate(C): 

2288 for p in state: 

2289 if p.lr_index < p.len - 1: 

2290 t = (stateno, p.prod[p.lr_index+1]) 

2291 if t[1] in self.grammar.Nonterminals: 

2292 if t not in trans: 

2293 trans.append(t) 

2294 return trans 

2295 

2296 # ----------------------------------------------------------------------------- 

2297 # dr_relation() 

2298 # 

2299 # Computes the DR(p,A) relationships for non-terminal transitions. The input 

2300 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 

2301 # 

2302 # Returns a list of terminals. 

2303 # ----------------------------------------------------------------------------- 

2304 

2305 def dr_relation(self, C, trans, nullable): 

2306 state, N = trans 

2307 terms = [] 

2308 

2309 g = self.lr0_goto(C[state], N) 

2310 for p in g: 

2311 if p.lr_index < p.len - 1: 

2312 a = p.prod[p.lr_index+1] 

2313 if a in self.grammar.Terminals: 

2314 if a not in terms: 

2315 terms.append(a) 

2316 

2317 # This extra bit is to handle the start state 

2318 if state == 0 and N == self.grammar.Productions[0].prod[0]: 

2319 terms.append('$end') 

2320 

2321 return terms 

2322 

2323 # ----------------------------------------------------------------------------- 

2324 # reads_relation() 

2325 # 

2326 # Computes the READS() relation (p,A) READS (t,C). 

2327 # ----------------------------------------------------------------------------- 

2328 

2329 def reads_relation(self, C, trans, empty): 

2330 # Look for empty transitions 

2331 rel = [] 

2332 state, N = trans 

2333 

2334 g = self.lr0_goto(C[state], N) 

2335 j = self.lr0_cidhash.get(id(g), -1) 

2336 for p in g: 

2337 if p.lr_index < p.len - 1: 

2338 a = p.prod[p.lr_index + 1] 

2339 if a in empty: 

2340 rel.append((j, a)) 

2341 

2342 return rel 

2343 

2344 # ----------------------------------------------------------------------------- 

2345 # compute_lookback_includes() 

2346 # 

2347 # Determines the lookback and includes relations 

2348 # 

2349 # LOOKBACK: 

2350 # 

2351 # This relation is determined by running the LR(0) state machine forward. 

2352 # For example, starting with a production "N : . A B C", we run it forward 

2353 # to obtain "N : A B C ." We then build a relationship between this final 

2354 # state and the starting state. These relationships are stored in a dictionary 

2355 # lookdict. 

2356 # 

2357 # INCLUDES: 

2358 # 

2359 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 

2360 # 

2361 # This relation is used to determine non-terminal transitions that occur 

2362 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 

2363 # if the following holds: 

2364 # 

2365 # B -> LAT, where T -> epsilon and p' -L-> p 

2366 # 

2367 # L is essentially a prefix (which may be empty), T is a suffix that must be 

2368 # able to derive an empty string. State p' must lead to state p with the string L. 

2369 # 

2370 # ----------------------------------------------------------------------------- 

2371 

2372 def compute_lookback_includes(self, C, trans, nullable): 

2373 lookdict = {} # Dictionary of lookback relations 

2374 includedict = {} # Dictionary of include relations 

2375 

2376 # Make a dictionary of non-terminal transitions 

2377 dtrans = {} 

2378 for t in trans: 

2379 dtrans[t] = 1 

2380 

2381 # Loop over all transitions and compute lookbacks and includes 

2382 for state, N in trans: 

2383 lookb = [] 

2384 includes = [] 

2385 for p in C[state]: 

2386 if p.name != N: 

2387 continue 

2388 

2389 # Okay, we have a name match. We now follow the production all the way 

2390 # through the state machine until we get the . on the right hand side 

2391 

2392 lr_index = p.lr_index 

2393 j = state 

2394 while lr_index < p.len - 1: 

2395 lr_index = lr_index + 1 

2396 t = p.prod[lr_index] 

2397 

2398 # Check to see if this symbol and state are a non-terminal transition 

2399 if (j, t) in dtrans: 

2400 # Yes. Okay, there is some chance that this is an includes relation 

2401 # the only way to know for certain is whether the rest of the 

2402 # production derives empty 

2403 

2404 li = lr_index + 1 

2405 while li < p.len: 

2406 if p.prod[li] in self.grammar.Terminals: 

2407 break # No forget it 

2408 if p.prod[li] not in nullable: 

2409 break 

2410 li = li + 1 

2411 else: 

2412 # Appears to be a relation between (j,t) and (state,N) 

2413 includes.append((j, t)) 

2414 

2415 g = self.lr0_goto(C[j], t) # Go to next set 

2416 j = self.lr0_cidhash.get(id(g), -1) # Go to next state 

2417 

2418 # When we get here, j is the final state, now we have to locate the production 

2419 for r in C[j]: 

2420 if r.name != p.name: 

2421 continue 

2422 if r.len != p.len: 

2423 continue 

2424 i = 0 

2425 # This look is comparing a production ". A B C" with "A B C ." 

2426 while i < r.lr_index: 

2427 if r.prod[i] != p.prod[i+1]: 

2428 break 

2429 i = i + 1 

2430 else: 

2431 lookb.append((j, r)) 

2432 for i in includes: 

2433 if i not in includedict: 

2434 includedict[i] = [] 

2435 includedict[i].append((state, N)) 

2436 lookdict[(state, N)] = lookb 

2437 

2438 return lookdict, includedict 

2439 

2440 # ----------------------------------------------------------------------------- 

2441 # compute_read_sets() 

2442 # 

2443 # Given a set of LR(0) items, this function computes the read sets. 

2444 # 

2445 # Inputs: C = Set of LR(0) items 

2446 # ntrans = Set of nonterminal transitions 

2447 # nullable = Set of empty transitions 

2448 # 

2449 # Returns a set containing the read sets 

2450 # ----------------------------------------------------------------------------- 

2451 

2452 def compute_read_sets(self, C, ntrans, nullable): 

2453 FP = lambda x: self.dr_relation(C, x, nullable) 

2454 R = lambda x: self.reads_relation(C, x, nullable) 

2455 F = digraph(ntrans, R, FP) 

2456 return F 

2457 

2458 # ----------------------------------------------------------------------------- 

2459 # compute_follow_sets() 

2460 # 

2461 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 

2462 # and an include set, this function computes the follow sets 

2463 # 

2464 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 

2465 # 

2466 # Inputs: 

2467 # ntrans = Set of nonterminal transitions 

2468 # readsets = Readset (previously computed) 

2469 # inclsets = Include sets (previously computed) 

2470 # 

2471 # Returns a set containing the follow sets 

2472 # ----------------------------------------------------------------------------- 

2473 

2474 def compute_follow_sets(self, ntrans, readsets, inclsets): 

2475 FP = lambda x: readsets[x] 

2476 R = lambda x: inclsets.get(x, []) 

2477 F = digraph(ntrans, R, FP) 

2478 return F 

2479 

2480 # ----------------------------------------------------------------------------- 

2481 # add_lookaheads() 

2482 # 

2483 # Attaches the lookahead symbols to grammar rules. 

2484 # 

2485 # Inputs: lookbacks - Set of lookback relations 

2486 # followset - Computed follow set 

2487 # 

2488 # This function directly attaches the lookaheads to productions contained 

2489 # in the lookbacks set 

2490 # ----------------------------------------------------------------------------- 

2491 

2492 def add_lookaheads(self, lookbacks, followset): 

2493 for trans, lb in lookbacks.items(): 

2494 # Loop over productions in lookback 

2495 for state, p in lb: 

2496 if state not in p.lookaheads: 

2497 p.lookaheads[state] = [] 

2498 f = followset.get(trans, []) 

2499 for a in f: 

2500 if a not in p.lookaheads[state]: 

2501 p.lookaheads[state].append(a) 

2502 

2503 # ----------------------------------------------------------------------------- 

2504 # add_lalr_lookaheads() 

2505 # 

2506 # This function does all of the work of adding lookahead information for use 

2507 # with LALR parsing 

2508 # ----------------------------------------------------------------------------- 

2509 

2510 def add_lalr_lookaheads(self, C): 

2511 # Determine all of the nullable nonterminals 

2512 nullable = self.compute_nullable_nonterminals() 

2513 

2514 # Find all non-terminal transitions 

2515 trans = self.find_nonterminal_transitions(C) 

2516 

2517 # Compute read sets 

2518 readsets = self.compute_read_sets(C, trans, nullable) 

2519 

2520 # Compute lookback/includes relations 

2521 lookd, included = self.compute_lookback_includes(C, trans, nullable) 

2522 

2523 # Compute LALR FOLLOW sets 

2524 followsets = self.compute_follow_sets(trans, readsets, included) 

2525 

2526 # Add all of the lookaheads 

2527 self.add_lookaheads(lookd, followsets) 

2528 

2529 # ----------------------------------------------------------------------------- 

2530 # lr_parse_table() 

2531 # 

2532 # This function constructs the parse tables for SLR or LALR 

2533 # ----------------------------------------------------------------------------- 

2534 def lr_parse_table(self): 

2535 Productions = self.grammar.Productions 

2536 Precedence = self.grammar.Precedence 

2537 goto = self.lr_goto # Goto array 

2538 action = self.lr_action # Action array 

2539 log = self.log # Logger for output 

2540 

2541 actionp = {} # Action production array (temporary) 

2542 

2543 log.info('Parsing method: %s', self.lr_method) 

2544 

2545 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 

2546 # This determines the number of states 

2547 

2548 C = self.lr0_items() 

2549 

2550 if self.lr_method == 'LALR': 

2551 self.add_lalr_lookaheads(C) 

2552 

2553 # Build the parser table, state by state 

2554 st = 0 

2555 for I in C: 

2556 # Loop over each production in I 

2557 actlist = [] # List of actions 

2558 st_action = {} 

2559 st_actionp = {} 

2560 st_goto = {} 

2561 log.info('') 

2562 log.info('state %d', st) 

2563 log.info('') 

2564 for p in I: 

2565 log.info(' (%d) %s', p.number, p) 

2566 log.info('') 

2567 

2568 for p in I: 

2569 if p.len == p.lr_index + 1: 

2570 if p.name == "S'": 

2571 # Start symbol. Accept! 

2572 st_action['$end'] = 0 

2573 st_actionp['$end'] = p 

2574 else: 

2575 # We are at the end of a production. Reduce! 

2576 if self.lr_method == 'LALR': 

2577 laheads = p.lookaheads[st] 

2578 else: 

2579 laheads = self.grammar.Follow[p.name] 

2580 for a in laheads: 

2581 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) 

2582 r = st_action.get(a) 

2583 if r is not None: 

2584 # Whoa. Have a shift/reduce or reduce/reduce conflict 

2585 if r > 0: 

2586 # Need to decide on shift or reduce here 

2587 # By default we favor shifting. Need to add 

2588 # some precedence rules here. 

2589 

2590 # Shift precedence comes from the token 

2591 sprec, slevel = Precedence.get(a, ('right', 0)) 

2592 

2593 # Reduce precedence comes from rule being reduced (p) 

2594 rprec, rlevel = Productions[p.number].prec 

2595 

2596 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 

2597 # We really need to reduce here. 

2598 st_action[a] = -p.number 

2599 st_actionp[a] = p 

2600 if not slevel and not rlevel: 

2601 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 

2602 self.sr_conflicts.append((st, a, 'reduce')) 

2603 Productions[p.number].reduced += 1 

2604 elif (slevel == rlevel) and (rprec == 'nonassoc'): 

2605 st_action[a] = None 

2606 else: 

2607 # Hmmm. Guess we'll keep the shift 

2608 if not rlevel: 

2609 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 

2610 self.sr_conflicts.append((st, a, 'shift')) 

2611 elif r < 0: 

2612 # Reduce/reduce conflict. In this case, we favor the rule 

2613 # that was defined first in the grammar file 

2614 oldp = Productions[-r] 

2615 pp = Productions[p.number] 

2616 if oldp.line > pp.line: 

2617 st_action[a] = -p.number 

2618 st_actionp[a] = p 

2619 chosenp, rejectp = pp, oldp 

2620 Productions[p.number].reduced += 1 

2621 Productions[oldp.number].reduced -= 1 

2622 else: 

2623 chosenp, rejectp = oldp, pp 

2624 self.rr_conflicts.append((st, chosenp, rejectp)) 

2625 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', 

2626 a, st_actionp[a].number, st_actionp[a]) 

2627 else: 

2628 raise LALRError('Unknown conflict in state %d' % st) 

2629 else: 

2630 st_action[a] = -p.number 

2631 st_actionp[a] = p 

2632 Productions[p.number].reduced += 1 

2633 else: 

2634 i = p.lr_index 

2635 a = p.prod[i+1] # Get symbol right after the "." 

2636 if a in self.grammar.Terminals: 

2637 g = self.lr0_goto(I, a) 

2638 j = self.lr0_cidhash.get(id(g), -1) 

2639 if j >= 0: 

2640 # We are in a shift state 

2641 actlist.append((a, p, 'shift and go to state %d' % j)) 

2642 r = st_action.get(a) 

2643 if r is not None: 

2644 # Whoa have a shift/reduce or shift/shift conflict 

2645 if r > 0: 

2646 if r != j: 

2647 raise LALRError('Shift/shift conflict in state %d' % st) 

2648 elif r < 0: 

2649 # Do a precedence check. 

2650 # - if precedence of reduce rule is higher, we reduce. 

2651 # - if precedence of reduce is same and left assoc, we reduce. 

2652 # - otherwise we shift 

2653 

2654 # Shift precedence comes from the token 

2655 sprec, slevel = Precedence.get(a, ('right', 0)) 

2656 

2657 # Reduce precedence comes from the rule that could have been reduced 

2658 rprec, rlevel = Productions[st_actionp[a].number].prec 

2659 

2660 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 

2661 # We decide to shift here... highest precedence to shift 

2662 Productions[st_actionp[a].number].reduced -= 1 

2663 st_action[a] = j 

2664 st_actionp[a] = p 

2665 if not rlevel: 

2666 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 

2667 self.sr_conflicts.append((st, a, 'shift')) 

2668 elif (slevel == rlevel) and (rprec == 'nonassoc'): 

2669 st_action[a] = None 

2670 else: 

2671 # Hmmm. Guess we'll keep the reduce 

2672 if not slevel and not rlevel: 

2673 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 

2674 self.sr_conflicts.append((st, a, 'reduce')) 

2675 

2676 else: 

2677 raise LALRError('Unknown conflict in state %d' % st) 

2678 else: 

2679 st_action[a] = j 

2680 st_actionp[a] = p 

2681 

2682 # Print the actions associated with each terminal 

2683 _actprint = {} 

2684 for a, p, m in actlist: 

2685 if a in st_action: 

2686 if p is st_actionp[a]: 

2687 log.info(' %-15s %s', a, m) 

2688 _actprint[(a, m)] = 1 

2689 log.info('') 

2690 # Print the actions that were not used. (debugging) 

2691 not_used = 0 

2692 for a, p, m in actlist: 

2693 if a in st_action: 

2694 if p is not st_actionp[a]: 

2695 if not (a, m) in _actprint: 

2696 log.debug(' ! %-15s [ %s ]', a, m) 

2697 not_used = 1 

2698 _actprint[(a, m)] = 1 

2699 if not_used: 

2700 log.debug('') 

2701 

2702 # Construct the goto table for this state 

2703 

2704 nkeys = {} 

2705 for ii in I: 

2706 for s in ii.usyms: 

2707 if s in self.grammar.Nonterminals: 

2708 nkeys[s] = None 

2709 for n in nkeys: 

2710 g = self.lr0_goto(I, n) 

2711 j = self.lr0_cidhash.get(id(g), -1) 

2712 if j >= 0: 

2713 st_goto[n] = j 

2714 log.info(' %-30s shift and go to state %d', n, j) 

2715 

2716 action[st] = st_action 

2717 actionp[st] = st_actionp 

2718 goto[st] = st_goto 

2719 st += 1 

2720 

2721 # ----------------------------------------------------------------------------- 

2722 # write() 

2723 # 

2724 # This function writes the LR parsing tables to a file 

2725 # ----------------------------------------------------------------------------- 

2726 

2727 def write_table(self, tabmodule, outputdir='', signature=''): 

2728 if isinstance(tabmodule, types.ModuleType): 

2729 raise IOError("Won't overwrite existing tabmodule") 

2730 

2731 basemodulename = tabmodule.split('.')[-1] 

2732 filename = os.path.join(outputdir, basemodulename) + '.py' 

2733 try: 

2734 f = open(filename, 'w') 

2735 

2736 f.write(''' 

2737# %s 

2738# This file is automatically generated. Do not edit. 

2739# pylint: disable=W,C,R 

2740_tabversion = %r 

2741 

2742_lr_method = %r 

2743 

2744_lr_signature = %r 

2745 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) 

2746 

2747 # Change smaller to 0 to go back to original tables 

2748 smaller = 1 

2749 

2750 # Factor out names to try and make smaller 

2751 if smaller: 

2752 items = {} 

2753 

2754 for s, nd in self.lr_action.items(): 

2755 for name, v in nd.items(): 

2756 i = items.get(name) 

2757 if not i: 

2758 i = ([], []) 

2759 items[name] = i 

2760 i[0].append(s) 

2761 i[1].append(v) 

2762 

2763 f.write('\n_lr_action_items = {') 

2764 for k, v in items.items(): 

2765 f.write('%r:([' % k) 

2766 for i in v[0]: 

2767 f.write('%r,' % i) 

2768 f.write('],[') 

2769 for i in v[1]: 

2770 f.write('%r,' % i) 

2771 

2772 f.write(']),') 

2773 f.write('}\n') 

2774 

2775 f.write(''' 

2776_lr_action = {} 

2777for _k, _v in _lr_action_items.items(): 

2778 for _x,_y in zip(_v[0],_v[1]): 

2779 if not _x in _lr_action: _lr_action[_x] = {} 

2780 _lr_action[_x][_k] = _y 

2781del _lr_action_items 

2782''') 

2783 

2784 else: 

2785 f.write('\n_lr_action = { ') 

2786 for k, v in self.lr_action.items(): 

2787 f.write('(%r,%r):%r,' % (k[0], k[1], v)) 

2788 f.write('}\n') 

2789 

2790 if smaller: 

2791 # Factor out names to try and make smaller 

2792 items = {} 

2793 

2794 for s, nd in self.lr_goto.items(): 

2795 for name, v in nd.items(): 

2796 i = items.get(name) 

2797 if not i: 

2798 i = ([], []) 

2799 items[name] = i 

2800 i[0].append(s) 

2801 i[1].append(v) 

2802 

2803 f.write('\n_lr_goto_items = {') 

2804 for k, v in items.items(): 

2805 f.write('%r:([' % k) 

2806 for i in v[0]: 

2807 f.write('%r,' % i) 

2808 f.write('],[') 

2809 for i in v[1]: 

2810 f.write('%r,' % i) 

2811 

2812 f.write(']),') 

2813 f.write('}\n') 

2814 

2815 f.write(''' 

2816_lr_goto = {} 

2817for _k, _v in _lr_goto_items.items(): 

2818 for _x, _y in zip(_v[0], _v[1]): 

2819 if not _x in _lr_goto: _lr_goto[_x] = {} 

2820 _lr_goto[_x][_k] = _y 

2821del _lr_goto_items 

2822''') 

2823 else: 

2824 f.write('\n_lr_goto = { ') 

2825 for k, v in self.lr_goto.items(): 

2826 f.write('(%r,%r):%r,' % (k[0], k[1], v)) 

2827 f.write('}\n') 

2828 

2829 # Write production table 

2830 f.write('_lr_productions = [\n') 

2831 for p in self.lr_productions: 

2832 if p.func: 

2833 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, 

2834 p.func, os.path.basename(p.file), p.line)) 

2835 else: 

2836 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) 

2837 f.write(']\n') 

2838 f.close() 

2839 

2840 except IOError as e: 

2841 raise 

2842 

2843 

2844 # ----------------------------------------------------------------------------- 

2845 # pickle_table() 

2846 # 

2847 # This function pickles the LR parsing tables to a supplied file object 

2848 # ----------------------------------------------------------------------------- 

2849 

2850 def pickle_table(self, filename, signature=''): 

2851 try: 

2852 import cPickle as pickle 

2853 except ImportError: 

2854 import pickle 

2855 with open(filename, 'wb') as outf: 

2856 pickle.dump(__tabversion__, outf, pickle_protocol) 

2857 pickle.dump(self.lr_method, outf, pickle_protocol) 

2858 pickle.dump(signature, outf, pickle_protocol) 

2859 pickle.dump(self.lr_action, outf, pickle_protocol) 

2860 pickle.dump(self.lr_goto, outf, pickle_protocol) 

2861 

2862 outp = [] 

2863 for p in self.lr_productions: 

2864 if p.func: 

2865 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) 

2866 else: 

2867 outp.append((str(p), p.name, p.len, None, None, None)) 

2868 pickle.dump(outp, outf, pickle_protocol) 

2869 

2870# ----------------------------------------------------------------------------- 

2871# === INTROSPECTION === 

2872# 

2873# The following functions and classes are used to implement the PLY 

2874# introspection features followed by the yacc() function itself. 

2875# ----------------------------------------------------------------------------- 

2876 

2877# ----------------------------------------------------------------------------- 

2878# get_caller_module_dict() 

2879# 

2880# This function returns a dictionary containing all of the symbols defined within 

2881# a caller further down the call stack. This is used to get the environment 

2882# associated with the yacc() call if none was provided. 

2883# ----------------------------------------------------------------------------- 

2884 

2885def get_caller_module_dict(levels): 

2886 f = sys._getframe(levels) 

2887 ldict = f.f_globals.copy() 

2888 if f.f_globals != f.f_locals: 

2889 ldict.update(f.f_locals) 

2890 return ldict 

2891 

2892# ----------------------------------------------------------------------------- 

2893# parse_grammar() 

2894# 

2895# This takes a raw grammar rule string and parses it into production data 

2896# ----------------------------------------------------------------------------- 

2897def parse_grammar(doc, file, line): 

2898 grammar = [] 

2899 # Split the doc string into lines 

2900 pstrings = doc.splitlines() 

2901 lastp = None 

2902 dline = line 

2903 for ps in pstrings: 

2904 dline += 1 

2905 p = ps.split() 

2906 if not p: 

2907 continue 

2908 try: 

2909 if p[0] == '|': 

2910 # This is a continuation of a previous rule 

2911 if not lastp: 

2912 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) 

2913 prodname = lastp 

2914 syms = p[1:] 

2915 else: 

2916 prodname = p[0] 

2917 lastp = prodname 

2918 syms = p[2:] 

2919 assign = p[1] 

2920 if assign != ':' and assign != '::=': 

2921 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) 

2922 

2923 grammar.append((file, dline, prodname, syms)) 

2924 except SyntaxError: 

2925 raise 

2926 except Exception: 

2927 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) 

2928 

2929 return grammar 

2930 

2931# ----------------------------------------------------------------------------- 

2932# ParserReflect() 

2933# 

2934# This class represents information extracted for building a parser including 

2935# start symbol, error function, tokens, precedence list, action functions, 

2936# etc. 

2937# ----------------------------------------------------------------------------- 

2938class ParserReflect(object): 

2939 def __init__(self, pdict, log=None): 

2940 self.pdict = pdict 

2941 self.start = None 

2942 self.error_func = None 

2943 self.tokens = None 

2944 self.modules = set() 

2945 self.grammar = [] 

2946 self.error = False 

2947 

2948 if log is None: 

2949 self.log = PlyLogger(sys.stderr) 

2950 else: 

2951 self.log = log 

2952 

2953 # Get all of the basic information 

2954 def get_all(self): 

2955 self.get_start() 

2956 self.get_error_func() 

2957 self.get_tokens() 

2958 self.get_precedence() 

2959 self.get_pfunctions() 

2960 

2961 # Validate all of the information 

2962 def validate_all(self): 

2963 self.validate_start() 

2964 self.validate_error_func() 

2965 self.validate_tokens() 

2966 self.validate_precedence() 

2967 self.validate_pfunctions() 

2968 self.validate_modules() 

2969 return self.error 

2970 

2971 # Compute a signature over the grammar 

2972 def signature(self): 

2973 parts = [] 

2974 try: 

2975 if self.start: 

2976 parts.append(self.start) 

2977 if self.prec: 

2978 parts.append(''.join([''.join(p) for p in self.prec])) 

2979 if self.tokens: 

2980 parts.append(' '.join(self.tokens)) 

2981 for f in self.pfuncs: 

2982 if f[3]: 

2983 parts.append(f[3]) 

2984 except (TypeError, ValueError): 

2985 pass 

2986 return ''.join(parts) 

2987 

2988 # ----------------------------------------------------------------------------- 

2989 # validate_modules() 

2990 # 

2991 # This method checks to see if there are duplicated p_rulename() functions 

2992 # in the parser module file. Without this function, it is really easy for 

2993 # users to make mistakes by cutting and pasting code fragments (and it's a real 

2994 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 

2995 # we just do a little regular expression pattern matching of def statements 

2996 # to try and detect duplicates. 

2997 # ----------------------------------------------------------------------------- 

2998 

2999 def validate_modules(self): 

3000 # Match def p_funcname( 

3001 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 

3002 

3003 for module in self.modules: 

3004 try: 

3005 lines, linen = inspect.getsourcelines(module) 

3006 except IOError: 

3007 continue 

3008 

3009 counthash = {} 

3010 for linen, line in enumerate(lines): 

3011 linen += 1 

3012 m = fre.match(line) 

3013 if m: 

3014 name = m.group(1) 

3015 prev = counthash.get(name) 

3016 if not prev: 

3017 counthash[name] = linen 

3018 else: 

3019 filename = inspect.getsourcefile(module) 

3020 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', 

3021 filename, linen, name, prev) 

3022 

3023 # Get the start symbol 

3024 def get_start(self): 

3025 self.start = self.pdict.get('start') 

3026 

3027 # Validate the start symbol 

3028 def validate_start(self): 

3029 if self.start is not None: 

3030 if not isinstance(self.start, string_types): 

3031 self.log.error("'start' must be a string") 

3032 

3033 # Look for error handler 

3034 def get_error_func(self): 

3035 self.error_func = self.pdict.get('p_error') 

3036 

3037 # Validate the error function 

3038 def validate_error_func(self): 

3039 if self.error_func: 

3040 if isinstance(self.error_func, types.FunctionType): 

3041 ismethod = 0 

3042 elif isinstance(self.error_func, types.MethodType): 

3043 ismethod = 1 

3044 else: 

3045 self.log.error("'p_error' defined, but is not a function or method") 

3046 self.error = True 

3047 return 

3048 

3049 eline = self.error_func.__code__.co_firstlineno 

3050 efile = self.error_func.__code__.co_filename 

3051 module = inspect.getmodule(self.error_func) 

3052 self.modules.add(module) 

3053 

3054 argcount = self.error_func.__code__.co_argcount - ismethod 

3055 if argcount != 1: 

3056 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) 

3057 self.error = True 

3058 

3059 # Get the tokens map 

3060 def get_tokens(self): 

3061 tokens = self.pdict.get('tokens') 

3062 if not tokens: 

3063 self.log.error('No token list is defined') 

3064 self.error = True 

3065 return 

3066 

3067 if not isinstance(tokens, (list, tuple)): 

3068 self.log.error('tokens must be a list or tuple') 

3069 self.error = True 

3070 return 

3071 

3072 if not tokens: 

3073 self.log.error('tokens is empty') 

3074 self.error = True 

3075 return 

3076 

3077 self.tokens = sorted(tokens) 

3078 

3079 # Validate the tokens 

3080 def validate_tokens(self): 

3081 # Validate the tokens. 

3082 if 'error' in self.tokens: 

3083 self.log.error("Illegal token name 'error'. Is a reserved word") 

3084 self.error = True 

3085 return 

3086 

3087 terminals = set() 

3088 for n in self.tokens: 

3089 if n in terminals: 

3090 self.log.warning('Token %r multiply defined', n) 

3091 terminals.add(n) 

3092 

3093 # Get the precedence map (if any) 

3094 def get_precedence(self): 

3095 self.prec = self.pdict.get('precedence') 

3096 

3097 # Validate and parse the precedence map 

3098 def validate_precedence(self): 

3099 preclist = [] 

3100 if self.prec: 

3101 if not isinstance(self.prec, (list, tuple)): 

3102 self.log.error('precedence must be a list or tuple') 

3103 self.error = True 

3104 return 

3105 for level, p in enumerate(self.prec): 

3106 if not isinstance(p, (list, tuple)): 

3107 self.log.error('Bad precedence table') 

3108 self.error = True 

3109 return 

3110 

3111 if len(p) < 2: 

3112 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) 

3113 self.error = True 

3114 return 

3115 assoc = p[0] 

3116 if not isinstance(assoc, string_types): 

3117 self.log.error('precedence associativity must be a string') 

3118 self.error = True 

3119 return 

3120 for term in p[1:]: 

3121 if not isinstance(term, string_types): 

3122 self.log.error('precedence items must be strings') 

3123 self.error = True 

3124 return 

3125 preclist.append((term, assoc, level+1)) 

3126 self.preclist = preclist 

3127 

3128 # Get all p_functions from the grammar 

3129 def get_pfunctions(self): 

3130 p_functions = [] 

3131 for name, item in self.pdict.items(): 

3132 if not name.startswith('p_') or name == 'p_error': 

3133 continue 

3134 if isinstance(item, (types.FunctionType, types.MethodType)): 

3135 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) 

3136 module = inspect.getmodule(item) 

3137 p_functions.append((line, module, name, item.__doc__)) 

3138 

3139 # Sort all of the actions by line number; make sure to stringify 

3140 # modules to make them sortable, since `line` may not uniquely sort all 

3141 # p functions 

3142 p_functions.sort(key=lambda p_function: ( 

3143 p_function[0], 

3144 str(p_function[1]), 

3145 p_function[2], 

3146 p_function[3])) 

3147 self.pfuncs = p_functions 

3148 

3149 # Validate all of the p_functions 

3150 def validate_pfunctions(self): 

3151 grammar = [] 

3152 # Check for non-empty symbols 

3153 if len(self.pfuncs) == 0: 

3154 self.log.error('no rules of the form p_rulename are defined') 

3155 self.error = True 

3156 return 

3157 

3158 for line, module, name, doc in self.pfuncs: 

3159 file = inspect.getsourcefile(module) 

3160 func = self.pdict[name] 

3161 if isinstance(func, types.MethodType): 

3162 reqargs = 2 

3163 else: 

3164 reqargs = 1 

3165 if func.__code__.co_argcount > reqargs: 

3166 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) 

3167 self.error = True 

3168 elif func.__code__.co_argcount < reqargs: 

3169 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) 

3170 self.error = True 

3171 elif not func.__doc__: 

3172 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', 

3173 file, line, func.__name__) 

3174 else: 

3175 try: 

3176 parsed_g = parse_grammar(doc, file, line) 

3177 for g in parsed_g: 

3178 grammar.append((name, g)) 

3179 except SyntaxError as e: 

3180 self.log.error(str(e)) 

3181 self.error = True 

3182 

3183 # Looks like a valid grammar rule 

3184 # Mark the file in which defined. 

3185 self.modules.add(module) 

3186 

3187 # Secondary validation step that looks for p_ definitions that are not functions 

3188 # or functions that look like they might be grammar rules. 

3189 

3190 for n, v in self.pdict.items(): 

3191 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): 

3192 continue 

3193 if n.startswith('t_'): 

3194 continue 

3195 if n.startswith('p_') and n != 'p_error': 

3196 self.log.warning('%r not defined as a function', n) 

3197 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or 

3198 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): 

3199 if v.__doc__: 

3200 try: 

3201 doc = v.__doc__.split(' ') 

3202 if doc[1] == ':': 

3203 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', 

3204 v.__code__.co_filename, v.__code__.co_firstlineno, n) 

3205 except IndexError: 

3206 pass 

3207 

3208 self.grammar = grammar 

3209 

3210# ----------------------------------------------------------------------------- 

3211# yacc(module) 

3212# 

3213# Build a parser 

3214# ----------------------------------------------------------------------------- 

3215 

3216def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 

3217 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, 

3218 outputdir=None, debuglog=None, errorlog=None, picklefile=None): 

3219 

3220 if tabmodule is None: 

3221 tabmodule = tab_module 

3222 

3223 # Reference to the parsing method of the last built parser 

3224 global parse 

3225 

3226 # If pickling is enabled, table files are not created 

3227 if picklefile: 

3228 write_tables = 0 

3229 

3230 if errorlog is None: 

3231 errorlog = PlyLogger(sys.stderr) 

3232 

3233 # Get the module dictionary used for the parser 

3234 if module: 

3235 _items = [(k, getattr(module, k)) for k in dir(module)] 

3236 pdict = dict(_items) 

3237 # If no __file__ or __package__ attributes are available, try to obtain them 

3238 # from the __module__ instead 

3239 if '__file__' not in pdict: 

3240 pdict['__file__'] = sys.modules[pdict['__module__']].__file__ 

3241 if '__package__' not in pdict and '__module__' in pdict: 

3242 if hasattr(sys.modules[pdict['__module__']], '__package__'): 

3243 pdict['__package__'] = sys.modules[pdict['__module__']].__package__ 

3244 else: 

3245 pdict = get_caller_module_dict(2) 

3246 

3247 if outputdir is None: 

3248 # If no output directory is set, the location of the output files 

3249 # is determined according to the following rules: 

3250 # - If tabmodule specifies a package, files go into that package directory 

3251 # - Otherwise, files go in the same directory as the specifying module 

3252 if isinstance(tabmodule, types.ModuleType): 

3253 srcfile = tabmodule.__file__ 

3254 else: 

3255 if '.' not in tabmodule: 

3256 srcfile = pdict['__file__'] 

3257 else: 

3258 parts = tabmodule.split('.') 

3259 pkgname = '.'.join(parts[:-1]) 

3260 exec('import %s' % pkgname) 

3261 srcfile = getattr(sys.modules[pkgname], '__file__', '') 

3262 outputdir = os.path.dirname(srcfile) 

3263 

3264 # Determine if the module is package of a package or not. 

3265 # If so, fix the tabmodule setting so that tables load correctly 

3266 pkg = pdict.get('__package__') 

3267 if pkg and isinstance(tabmodule, str): 

3268 if '.' not in tabmodule: 

3269 tabmodule = pkg + '.' + tabmodule 

3270 

3271 

3272 

3273 # Set start symbol if it's specified directly using an argument 

3274 if start is not None: 

3275 pdict['start'] = start 

3276 

3277 # Collect parser information from the dictionary 

3278 pinfo = ParserReflect(pdict, log=errorlog) 

3279 pinfo.get_all() 

3280 

3281 if pinfo.error: 

3282 raise YaccError('Unable to build parser') 

3283 

3284 # Check signature against table files (if any) 

3285 signature = pinfo.signature() 

3286 

3287 # Read the tables 

3288 try: 

3289 lr = LRTable() 

3290 if picklefile: 

3291 read_signature = lr.read_pickle(picklefile) 

3292 else: 

3293 read_signature = lr.read_table(tabmodule) 

3294 if optimize or (read_signature == signature): 

3295 try: 

3296 lr.bind_callables(pinfo.pdict) 

3297 parser = LRParser(lr, pinfo.error_func) 

3298 parse = parser.parse 

3299 return parser 

3300 except Exception as e: 

3301 errorlog.warning('There was a problem loading the table file: %r', e) 

3302 except VersionError as e: 

3303 errorlog.warning(str(e)) 

3304 except ImportError: 

3305 pass 

3306 

3307 if debuglog is None: 

3308 if debug: 

3309 try: 

3310 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) 

3311 except IOError as e: 

3312 errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) 

3313 debuglog = NullLogger() 

3314 else: 

3315 debuglog = NullLogger() 

3316 

3317 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) 

3318 

3319 errors = False 

3320 

3321 # Validate the parser information 

3322 if pinfo.validate_all(): 

3323 raise YaccError('Unable to build parser') 

3324 

3325 if not pinfo.error_func: 

3326 errorlog.warning('no p_error() function is defined') 

3327 

3328 # Create a grammar object 

3329 grammar = Grammar(pinfo.tokens) 

3330 

3331 # Set precedence level for terminals 

3332 for term, assoc, level in pinfo.preclist: 

3333 try: 

3334 grammar.set_precedence(term, assoc, level) 

3335 except GrammarError as e: 

3336 errorlog.warning('%s', e) 

3337 

3338 # Add productions to the grammar 

3339 for funcname, gram in pinfo.grammar: 

3340 file, line, prodname, syms = gram 

3341 try: 

3342 grammar.add_production(prodname, syms, funcname, file, line) 

3343 except GrammarError as e: 

3344 errorlog.error('%s', e) 

3345 errors = True 

3346 

3347 # Set the grammar start symbols 

3348 try: 

3349 if start is None: 

3350 grammar.set_start(pinfo.start) 

3351 else: 

3352 grammar.set_start(start) 

3353 except GrammarError as e: 

3354 errorlog.error(str(e)) 

3355 errors = True 

3356 

3357 if errors: 

3358 raise YaccError('Unable to build parser') 

3359 

3360 # Verify the grammar structure 

3361 undefined_symbols = grammar.undefined_symbols() 

3362 for sym, prod in undefined_symbols: 

3363 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) 

3364 errors = True 

3365 

3366 unused_terminals = grammar.unused_terminals() 

3367 if unused_terminals: 

3368 debuglog.info('') 

3369 debuglog.info('Unused terminals:') 

3370 debuglog.info('') 

3371 for term in unused_terminals: 

3372 errorlog.warning('Token %r defined, but not used', term) 

3373 debuglog.info(' %s', term) 

3374 

3375 # Print out all productions to the debug log 

3376 if debug: 

3377 debuglog.info('') 

3378 debuglog.info('Grammar') 

3379 debuglog.info('') 

3380 for n, p in enumerate(grammar.Productions): 

3381 debuglog.info('Rule %-5d %s', n, p) 

3382 

3383 # Find unused non-terminals 

3384 unused_rules = grammar.unused_rules() 

3385 for prod in unused_rules: 

3386 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) 

3387 

3388 if len(unused_terminals) == 1: 

3389 errorlog.warning('There is 1 unused token') 

3390 if len(unused_terminals) > 1: 

3391 errorlog.warning('There are %d unused tokens', len(unused_terminals)) 

3392 

3393 if len(unused_rules) == 1: 

3394 errorlog.warning('There is 1 unused rule') 

3395 if len(unused_rules) > 1: 

3396 errorlog.warning('There are %d unused rules', len(unused_rules)) 

3397 

3398 if debug: 

3399 debuglog.info('') 

3400 debuglog.info('Terminals, with rules where they appear') 

3401 debuglog.info('') 

3402 terms = list(grammar.Terminals) 

3403 terms.sort() 

3404 for term in terms: 

3405 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) 

3406 

3407 debuglog.info('') 

3408 debuglog.info('Nonterminals, with rules where they appear') 

3409 debuglog.info('') 

3410 nonterms = list(grammar.Nonterminals) 

3411 nonterms.sort() 

3412 for nonterm in nonterms: 

3413 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) 

3414 debuglog.info('') 

3415 

3416 if check_recursion: 

3417 unreachable = grammar.find_unreachable() 

3418 for u in unreachable: 

3419 errorlog.warning('Symbol %r is unreachable', u) 

3420 

3421 infinite = grammar.infinite_cycles() 

3422 for inf in infinite: 

3423 errorlog.error('Infinite recursion detected for symbol %r', inf) 

3424 errors = True 

3425 

3426 unused_prec = grammar.unused_precedence() 

3427 for term, assoc in unused_prec: 

3428 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) 

3429 errors = True 

3430 

3431 if errors: 

3432 raise YaccError('Unable to build parser') 

3433 

3434 # Run the LRGeneratedTable on the grammar 

3435 if debug: 

3436 errorlog.debug('Generating %s tables', method) 

3437 

3438 lr = LRGeneratedTable(grammar, method, debuglog) 

3439 

3440 if debug: 

3441 num_sr = len(lr.sr_conflicts) 

3442 

3443 # Report shift/reduce and reduce/reduce conflicts 

3444 if num_sr == 1: 

3445 errorlog.warning('1 shift/reduce conflict') 

3446 elif num_sr > 1: 

3447 errorlog.warning('%d shift/reduce conflicts', num_sr) 

3448 

3449 num_rr = len(lr.rr_conflicts) 

3450 if num_rr == 1: 

3451 errorlog.warning('1 reduce/reduce conflict') 

3452 elif num_rr > 1: 

3453 errorlog.warning('%d reduce/reduce conflicts', num_rr) 

3454 

3455 # Write out conflicts to the output file 

3456 if debug and (lr.sr_conflicts or lr.rr_conflicts): 

3457 debuglog.warning('') 

3458 debuglog.warning('Conflicts:') 

3459 debuglog.warning('') 

3460 

3461 for state, tok, resolution in lr.sr_conflicts: 

3462 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) 

3463 

3464 already_reported = set() 

3465 for state, rule, rejected in lr.rr_conflicts: 

3466 if (state, id(rule), id(rejected)) in already_reported: 

3467 continue 

3468 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 

3469 debuglog.warning('rejected rule (%s) in state %d', rejected, state) 

3470 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 

3471 errorlog.warning('rejected rule (%s) in state %d', rejected, state) 

3472 already_reported.add((state, id(rule), id(rejected))) 

3473 

3474 warned_never = [] 

3475 for state, rule, rejected in lr.rr_conflicts: 

3476 if not rejected.reduced and (rejected not in warned_never): 

3477 debuglog.warning('Rule (%s) is never reduced', rejected) 

3478 errorlog.warning('Rule (%s) is never reduced', rejected) 

3479 warned_never.append(rejected) 

3480 

3481 # Write the table file if requested 

3482 if write_tables: 

3483 try: 

3484 lr.write_table(tabmodule, outputdir, signature) 

3485 if tabmodule in sys.modules: 

3486 del sys.modules[tabmodule] 

3487 except IOError as e: 

3488 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) 

3489 

3490 # Write a pickled version of the tables 

3491 if picklefile: 

3492 try: 

3493 lr.pickle_table(picklefile, signature) 

3494 except IOError as e: 

3495 errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) 

3496 

3497 # Build the parser 

3498 lr.bind_callables(pinfo.pdict) 

3499 parser = LRParser(lr, pinfo.error_func) 

3500 

3501 parse = parser.parse 

3502 return parser