Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/ply/yacc.py: 7%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -----------------------------------------------------------------------------
2# ply: yacc.py
3#
4# Copyright (C) 2001-2018
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12# * Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14# * Redistributions in binary form must reproduce the above copyright notice,
15# this list of conditions and the following disclaimer in the documentation
16# and/or other materials provided with the distribution.
17# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18# endorse or promote products derived from this software without
19# specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32# -----------------------------------------------------------------------------
33#
34# This implements an LR parser that is constructed from grammar rules defined
35# as Python functions. The grammar is specified by supplying the BNF inside
36# Python documentation strings. The inspiration for this technique was borrowed
37# from John Aycock's Spark parsing system. PLY might be viewed as cross between
38# Spark and the GNU bison utility.
39#
40# The current implementation is only somewhat object-oriented. The
41# LR parser itself is defined in terms of an object (which allows multiple
42# parsers to co-exist). However, most of the variables used during table
43# construction are defined in terms of global variables. Users shouldn't
44# notice unless they are trying to define multiple parsers at the same
45# time using threads (in which case they should have their head examined).
46#
47# This implementation supports both SLR and LALR(1) parsing. LALR(1)
48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51# by the more efficient DeRemer and Pennello algorithm.
52#
53# :::::::: WARNING :::::::
54#
55# Construction of LR parsing tables is fairly complicated and expensive.
56# To make this module run fast, a *LOT* of work has been put into
57# optimization---often at the expensive of readability and what might
58# consider to be good Python "coding style." Modify the code at your
59# own risk!
60# ----------------------------------------------------------------------------
62import re
63import types
64import sys
65import os.path
66import inspect
67import warnings
69__version__ = '3.11'
70__tabversion__ = '3.10'
72#-----------------------------------------------------------------------------
73# === User configurable parameters ===
74#
75# Change these to modify the default behavior of yacc (if you wish)
76#-----------------------------------------------------------------------------
78yaccdebug = True # Debugging mode. If set, yacc generates a
79 # a 'parser.out' file in the current directory
81debug_file = 'parser.out' # Default name of the debugging file
82tab_module = 'parsetab' # Default name of the table module
83default_lr = 'LALR' # Default LR table generation method
85error_count = 3 # Number of symbols that must be shifted to leave recovery mode
87yaccdevel = False # Set to True if developing yacc. This turns off optimized
88 # implementations of certain functions.
90resultlimit = 40 # Size limit of results when running in debug mode.
92pickle_protocol = 0 # Protocol to use when writing pickle files
94# String type-checking compatibility
95if sys.version_info[0] < 3: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 string_types = basestring
97else:
98 string_types = str
100MAXINT = sys.maxsize
102# This object is a stand-in for a logging object created by the
103# logging module. PLY will use this by default to create things
104# such as the parser.out file. If a user wants more detailed
105# information, they can create their own logging object and pass
106# it into PLY.
108class PlyLogger(object):
109 def __init__(self, f):
110 self.f = f
112 def debug(self, msg, *args, **kwargs):
113 self.f.write((msg % args) + '\n')
115 info = debug
117 def warning(self, msg, *args, **kwargs):
118 self.f.write('WARNING: ' + (msg % args) + '\n')
120 def error(self, msg, *args, **kwargs):
121 self.f.write('ERROR: ' + (msg % args) + '\n')
123 critical = debug
125# Null logger is used when no output is generated. Does nothing.
126class NullLogger(object):
127 def __getattribute__(self, name):
128 return self
130 def __call__(self, *args, **kwargs):
131 return self
133# Exception raised for yacc-related errors
134class YaccError(Exception):
135 pass
137# Format the result message that the parser produces when running in debug mode.
138def format_result(r):
139 repr_str = repr(r)
140 if '\n' in repr_str:
141 repr_str = repr(repr_str)
142 if len(repr_str) > resultlimit:
143 repr_str = repr_str[:resultlimit] + ' ...'
144 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
145 return result
147# Format stack entries when the parser is running in debug mode
148def format_stack_entry(r):
149 repr_str = repr(r)
150 if '\n' in repr_str:
151 repr_str = repr(repr_str)
152 if len(repr_str) < 16:
153 return repr_str
154 else:
155 return '<%s @ 0x%x>' % (type(r).__name__, id(r))
157# Panic mode error recovery support. This feature is being reworked--much of the
158# code here is to offer a deprecation/backwards compatible transition
160_errok = None
161_token = None
162_restart = None
163_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
164Instead, invoke the methods on the associated parser instance:
166 def p_error(p):
167 ...
168 # Use parser.errok(), parser.token(), parser.restart()
169 ...
171 parser = yacc.yacc()
172'''
174def errok():
175 warnings.warn(_warnmsg)
176 return _errok()
178def restart():
179 warnings.warn(_warnmsg)
180 return _restart()
182def token():
183 warnings.warn(_warnmsg)
184 return _token()
186# Utility function to call the p_error() function with some deprecation hacks
187def call_errorfunc(errorfunc, token, parser):
188 global _errok, _token, _restart
189 _errok = parser.errok
190 _token = parser.token
191 _restart = parser.restart
192 r = errorfunc(token)
193 try:
194 del _errok, _token, _restart
195 except NameError:
196 pass
197 return r
199#-----------------------------------------------------------------------------
200# === LR Parsing Engine ===
201#
202# The following classes are used for the LR parser itself. These are not
203# used during table construction and are independent of the actual LR
204# table generation algorithm
205#-----------------------------------------------------------------------------
207# This class is used to hold non-terminal grammar symbols during parsing.
208# It normally has the following attributes set:
209# .type = Grammar symbol type
210# .value = Symbol value
211# .lineno = Starting line number
212# .endlineno = Ending line number (optional, set automatically)
213# .lexpos = Starting lex position
214# .endlexpos = Ending lex position (optional, set automatically)
216class YaccSymbol:
217 def __str__(self):
218 return self.type
220 def __repr__(self):
221 return str(self)
223# This class is a wrapper around the objects actually passed to each
224# grammar rule. Index lookup and assignment actually assign the
225# .value attribute of the underlying YaccSymbol object.
226# The lineno() method returns the line number of a given
227# item (or 0 if not defined). The linespan() method returns
228# a tuple of (startline,endline) representing the range of lines
229# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
230# representing the range of positional information for a symbol.
232class YaccProduction:
233 def __init__(self, s, stack=None):
234 self.slice = s
235 self.stack = stack
236 self.lexer = None
237 self.parser = None
239 def __getitem__(self, n):
240 if isinstance(n, slice):
241 return [s.value for s in self.slice[n]]
242 elif n >= 0:
243 return self.slice[n].value
244 else:
245 return self.stack[n].value
247 def __setitem__(self, n, v):
248 self.slice[n].value = v
250 def __getslice__(self, i, j):
251 return [s.value for s in self.slice[i:j]]
253 def __len__(self):
254 return len(self.slice)
256 def lineno(self, n):
257 return getattr(self.slice[n], 'lineno', 0)
259 def set_lineno(self, n, lineno):
260 self.slice[n].lineno = lineno
262 def linespan(self, n):
263 startline = getattr(self.slice[n], 'lineno', 0)
264 endline = getattr(self.slice[n], 'endlineno', startline)
265 return startline, endline
267 def lexpos(self, n):
268 return getattr(self.slice[n], 'lexpos', 0)
270 def set_lexpos(self, n, lexpos):
271 self.slice[n].lexpos = lexpos
273 def lexspan(self, n):
274 startpos = getattr(self.slice[n], 'lexpos', 0)
275 endpos = getattr(self.slice[n], 'endlexpos', startpos)
276 return startpos, endpos
278 def error(self):
279 raise SyntaxError
281# -----------------------------------------------------------------------------
282# == LRParser ==
283#
284# The LR Parsing engine.
285# -----------------------------------------------------------------------------
287class LRParser:
288 def __init__(self, lrtab, errorf):
289 self.productions = lrtab.lr_productions
290 self.action = lrtab.lr_action
291 self.goto = lrtab.lr_goto
292 self.errorfunc = errorf
293 self.set_defaulted_states()
294 self.errorok = True
296 def errok(self):
297 self.errorok = True
299 def restart(self):
300 del self.statestack[:]
301 del self.symstack[:]
302 sym = YaccSymbol()
303 sym.type = '$end'
304 self.symstack.append(sym)
305 self.statestack.append(0)
307 # Defaulted state support.
308 # This method identifies parser states where there is only one possible reduction action.
309 # For such states, the parser can make a choose to make a rule reduction without consuming
310 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
311 # certain kinds of advanced parsing situations where the lexer and parser interact with
312 # each other or change states (i.e., manipulation of scope, lexer states, etc.).
313 #
314 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
315 def set_defaulted_states(self):
316 self.defaulted_states = {}
317 for state, actions in self.action.items():
318 rules = list(actions.values())
319 if len(rules) == 1 and rules[0] < 0:
320 self.defaulted_states[state] = rules[0]
322 def disable_defaulted_states(self):
323 self.defaulted_states = {}
325 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
326 if debug or yaccdevel:
327 if isinstance(debug, int):
328 debug = PlyLogger(sys.stderr)
329 return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
330 elif tracking:
331 return self.parseopt(input, lexer, debug, tracking, tokenfunc)
332 else:
333 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
336 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
337 # parsedebug().
338 #
339 # This is the debugging enabled version of parse(). All changes made to the
340 # parsing engine should be made here. Optimized versions of this function
341 # are automatically created by the ply/ygen.py script. This script cuts out
342 # sections enclosed in markers such as this:
343 #
344 # #--! DEBUG
345 # statements
346 # #--! DEBUG
347 #
348 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
350 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
351 #--! parsedebug-start
352 lookahead = None # Current lookahead symbol
353 lookaheadstack = [] # Stack of lookahead symbols
354 actions = self.action # Local reference to action table (to avoid lookup on self.)
355 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
356 prod = self.productions # Local reference to production list (to avoid lookup on self.)
357 defaulted_states = self.defaulted_states # Local reference to defaulted states
358 pslice = YaccProduction(None) # Production object passed to grammar rules
359 errorcount = 0 # Used during error recovery
361 #--! DEBUG
362 debug.info('PLY: PARSE DEBUG START')
363 #--! DEBUG
365 # If no lexer was given, we will try to use the lex module
366 if not lexer:
367 from . import lex
368 lexer = lex.lexer
370 # Set up the lexer and parser objects on pslice
371 pslice.lexer = lexer
372 pslice.parser = self
374 # If input was supplied, pass to lexer
375 if input is not None:
376 lexer.input(input)
378 if tokenfunc is None:
379 # Tokenize function
380 get_token = lexer.token
381 else:
382 get_token = tokenfunc
384 # Set the parser() token method (sometimes used in error recovery)
385 self.token = get_token
387 # Set up the state and symbol stacks
389 statestack = [] # Stack of parsing states
390 self.statestack = statestack
391 symstack = [] # Stack of grammar symbols
392 self.symstack = symstack
394 pslice.stack = symstack # Put in the production
395 errtoken = None # Err token
397 # The start state is assumed to be (0,$end)
399 statestack.append(0)
400 sym = YaccSymbol()
401 sym.type = '$end'
402 symstack.append(sym)
403 state = 0
404 while True:
405 # Get the next symbol on the input. If a lookahead symbol
406 # is already set, we just use that. Otherwise, we'll pull
407 # the next token off of the lookaheadstack or from the lexer
409 #--! DEBUG
410 debug.debug('')
411 debug.debug('State : %s', state)
412 #--! DEBUG
414 if state not in defaulted_states:
415 if not lookahead:
416 if not lookaheadstack:
417 lookahead = get_token() # Get the next token
418 else:
419 lookahead = lookaheadstack.pop()
420 if not lookahead:
421 lookahead = YaccSymbol()
422 lookahead.type = '$end'
424 # Check the action table
425 ltype = lookahead.type
426 t = actions[state].get(ltype)
427 else:
428 t = defaulted_states[state]
429 #--! DEBUG
430 debug.debug('Defaulted state %s: Reduce using %d', state, -t)
431 #--! DEBUG
433 #--! DEBUG
434 debug.debug('Stack : %s',
435 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
436 #--! DEBUG
438 if t is not None:
439 if t > 0:
440 # shift a symbol on the stack
441 statestack.append(t)
442 state = t
444 #--! DEBUG
445 debug.debug('Action : Shift and goto state %s', t)
446 #--! DEBUG
448 symstack.append(lookahead)
449 lookahead = None
451 # Decrease error count on successful shift
452 if errorcount:
453 errorcount -= 1
454 continue
456 if t < 0:
457 # reduce a symbol on the stack, emit a production
458 p = prod[-t]
459 pname = p.name
460 plen = p.len
462 # Get production function
463 sym = YaccSymbol()
464 sym.type = pname # Production name
465 sym.value = None
467 #--! DEBUG
468 if plen:
469 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
470 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
471 goto[statestack[-1-plen]][pname])
472 else:
473 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
474 goto[statestack[-1]][pname])
476 #--! DEBUG
478 if plen:
479 targ = symstack[-plen-1:]
480 targ[0] = sym
482 #--! TRACKING
483 if tracking:
484 t1 = targ[1]
485 sym.lineno = t1.lineno
486 sym.lexpos = t1.lexpos
487 t1 = targ[-1]
488 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
489 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
490 #--! TRACKING
492 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
493 # The code enclosed in this section is duplicated
494 # below as a performance optimization. Make sure
495 # changes get made in both locations.
497 pslice.slice = targ
499 try:
500 # Call the grammar rule with our special slice object
501 del symstack[-plen:]
502 self.state = state
503 p.callable(pslice)
504 del statestack[-plen:]
505 #--! DEBUG
506 debug.info('Result : %s', format_result(pslice[0]))
507 #--! DEBUG
508 symstack.append(sym)
509 state = goto[statestack[-1]][pname]
510 statestack.append(state)
511 except SyntaxError:
512 # If an error was set. Enter error recovery state
513 lookaheadstack.append(lookahead) # Save the current lookahead token
514 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
515 statestack.pop() # Pop back one state (before the reduce)
516 state = statestack[-1]
517 sym.type = 'error'
518 sym.value = 'error'
519 lookahead = sym
520 errorcount = error_count
521 self.errorok = False
523 continue
524 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
526 else:
528 #--! TRACKING
529 if tracking:
530 sym.lineno = lexer.lineno
531 sym.lexpos = lexer.lexpos
532 #--! TRACKING
534 targ = [sym]
536 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
537 # The code enclosed in this section is duplicated
538 # above as a performance optimization. Make sure
539 # changes get made in both locations.
541 pslice.slice = targ
543 try:
544 # Call the grammar rule with our special slice object
545 self.state = state
546 p.callable(pslice)
547 #--! DEBUG
548 debug.info('Result : %s', format_result(pslice[0]))
549 #--! DEBUG
550 symstack.append(sym)
551 state = goto[statestack[-1]][pname]
552 statestack.append(state)
553 except SyntaxError:
554 # If an error was set. Enter error recovery state
555 lookaheadstack.append(lookahead) # Save the current lookahead token
556 statestack.pop() # Pop back one state (before the reduce)
557 state = statestack[-1]
558 sym.type = 'error'
559 sym.value = 'error'
560 lookahead = sym
561 errorcount = error_count
562 self.errorok = False
564 continue
565 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
567 if t == 0:
568 n = symstack[-1]
569 result = getattr(n, 'value', None)
570 #--! DEBUG
571 debug.info('Done : Returning %s', format_result(result))
572 debug.info('PLY: PARSE DEBUG END')
573 #--! DEBUG
574 return result
576 if t is None:
578 #--! DEBUG
579 debug.error('Error : %s',
580 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
581 #--! DEBUG
583 # We have some kind of parsing error here. To handle
584 # this, we are going to push the current token onto
585 # the tokenstack and replace it with an 'error' token.
586 # If there are any synchronization rules, they may
587 # catch it.
588 #
589 # In addition to pushing the error token, we call call
590 # the user defined p_error() function if this is the
591 # first syntax error. This function is only called if
592 # errorcount == 0.
593 if errorcount == 0 or self.errorok:
594 errorcount = error_count
595 self.errorok = False
596 errtoken = lookahead
597 if errtoken.type == '$end':
598 errtoken = None # End of file!
599 if self.errorfunc:
600 if errtoken and not hasattr(errtoken, 'lexer'):
601 errtoken.lexer = lexer
602 self.state = state
603 tok = call_errorfunc(self.errorfunc, errtoken, self)
604 if self.errorok:
605 # User must have done some kind of panic
606 # mode recovery on their own. The
607 # returned token is the next lookahead
608 lookahead = tok
609 errtoken = None
610 continue
611 else:
612 if errtoken:
613 if hasattr(errtoken, 'lineno'):
614 lineno = lookahead.lineno
615 else:
616 lineno = 0
617 if lineno:
618 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
619 else:
620 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
621 else:
622 sys.stderr.write('yacc: Parse error in input. EOF\n')
623 return
625 else:
626 errorcount = error_count
628 # case 1: the statestack only has 1 entry on it. If we're in this state, the
629 # entire parse has been rolled back and we're completely hosed. The token is
630 # discarded and we just keep going.
632 if len(statestack) <= 1 and lookahead.type != '$end':
633 lookahead = None
634 errtoken = None
635 state = 0
636 # Nuke the pushback stack
637 del lookaheadstack[:]
638 continue
640 # case 2: the statestack has a couple of entries on it, but we're
641 # at the end of the file. nuke the top entry and generate an error token
643 # Start nuking entries on the stack
644 if lookahead.type == '$end':
645 # Whoa. We're really hosed here. Bail out
646 return
648 if lookahead.type != 'error':
649 sym = symstack[-1]
650 if sym.type == 'error':
651 # Hmmm. Error is on top of stack, we'll just nuke input
652 # symbol and continue
653 #--! TRACKING
654 if tracking:
655 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
656 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
657 #--! TRACKING
658 lookahead = None
659 continue
661 # Create the error symbol for the first time and make it the new lookahead symbol
662 t = YaccSymbol()
663 t.type = 'error'
665 if hasattr(lookahead, 'lineno'):
666 t.lineno = t.endlineno = lookahead.lineno
667 if hasattr(lookahead, 'lexpos'):
668 t.lexpos = t.endlexpos = lookahead.lexpos
669 t.value = lookahead
670 lookaheadstack.append(lookahead)
671 lookahead = t
672 else:
673 sym = symstack.pop()
674 #--! TRACKING
675 if tracking:
676 lookahead.lineno = sym.lineno
677 lookahead.lexpos = sym.lexpos
678 #--! TRACKING
679 statestack.pop()
680 state = statestack[-1]
682 continue
684 # Call an error function here
685 raise RuntimeError('yacc: internal parser error!!!\n')
687 #--! parsedebug-end
689 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
690 # parseopt().
691 #
692 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
693 # This code is automatically generated by the ply/ygen.py script. Make
694 # changes to the parsedebug() method instead.
695 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
697 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
698 #--! parseopt-start
699 lookahead = None # Current lookahead symbol
700 lookaheadstack = [] # Stack of lookahead symbols
701 actions = self.action # Local reference to action table (to avoid lookup on self.)
702 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
703 prod = self.productions # Local reference to production list (to avoid lookup on self.)
704 defaulted_states = self.defaulted_states # Local reference to defaulted states
705 pslice = YaccProduction(None) # Production object passed to grammar rules
706 errorcount = 0 # Used during error recovery
709 # If no lexer was given, we will try to use the lex module
710 if not lexer:
711 from . import lex
712 lexer = lex.lexer
714 # Set up the lexer and parser objects on pslice
715 pslice.lexer = lexer
716 pslice.parser = self
718 # If input was supplied, pass to lexer
719 if input is not None:
720 lexer.input(input)
722 if tokenfunc is None:
723 # Tokenize function
724 get_token = lexer.token
725 else:
726 get_token = tokenfunc
728 # Set the parser() token method (sometimes used in error recovery)
729 self.token = get_token
731 # Set up the state and symbol stacks
733 statestack = [] # Stack of parsing states
734 self.statestack = statestack
735 symstack = [] # Stack of grammar symbols
736 self.symstack = symstack
738 pslice.stack = symstack # Put in the production
739 errtoken = None # Err token
741 # The start state is assumed to be (0,$end)
743 statestack.append(0)
744 sym = YaccSymbol()
745 sym.type = '$end'
746 symstack.append(sym)
747 state = 0
748 while True:
749 # Get the next symbol on the input. If a lookahead symbol
750 # is already set, we just use that. Otherwise, we'll pull
751 # the next token off of the lookaheadstack or from the lexer
754 if state not in defaulted_states:
755 if not lookahead:
756 if not lookaheadstack:
757 lookahead = get_token() # Get the next token
758 else:
759 lookahead = lookaheadstack.pop()
760 if not lookahead:
761 lookahead = YaccSymbol()
762 lookahead.type = '$end'
764 # Check the action table
765 ltype = lookahead.type
766 t = actions[state].get(ltype)
767 else:
768 t = defaulted_states[state]
771 if t is not None:
772 if t > 0:
773 # shift a symbol on the stack
774 statestack.append(t)
775 state = t
778 symstack.append(lookahead)
779 lookahead = None
781 # Decrease error count on successful shift
782 if errorcount:
783 errorcount -= 1
784 continue
786 if t < 0:
787 # reduce a symbol on the stack, emit a production
788 p = prod[-t]
789 pname = p.name
790 plen = p.len
792 # Get production function
793 sym = YaccSymbol()
794 sym.type = pname # Production name
795 sym.value = None
798 if plen:
799 targ = symstack[-plen-1:]
800 targ[0] = sym
802 #--! TRACKING
803 if tracking:
804 t1 = targ[1]
805 sym.lineno = t1.lineno
806 sym.lexpos = t1.lexpos
807 t1 = targ[-1]
808 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
809 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
810 #--! TRACKING
812 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
813 # The code enclosed in this section is duplicated
814 # below as a performance optimization. Make sure
815 # changes get made in both locations.
817 pslice.slice = targ
819 try:
820 # Call the grammar rule with our special slice object
821 del symstack[-plen:]
822 self.state = state
823 p.callable(pslice)
824 del statestack[-plen:]
825 symstack.append(sym)
826 state = goto[statestack[-1]][pname]
827 statestack.append(state)
828 except SyntaxError:
829 # If an error was set. Enter error recovery state
830 lookaheadstack.append(lookahead) # Save the current lookahead token
831 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
832 statestack.pop() # Pop back one state (before the reduce)
833 state = statestack[-1]
834 sym.type = 'error'
835 sym.value = 'error'
836 lookahead = sym
837 errorcount = error_count
838 self.errorok = False
840 continue
841 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
843 else:
845 #--! TRACKING
846 if tracking:
847 sym.lineno = lexer.lineno
848 sym.lexpos = lexer.lexpos
849 #--! TRACKING
851 targ = [sym]
853 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
854 # The code enclosed in this section is duplicated
855 # above as a performance optimization. Make sure
856 # changes get made in both locations.
858 pslice.slice = targ
860 try:
861 # Call the grammar rule with our special slice object
862 self.state = state
863 p.callable(pslice)
864 symstack.append(sym)
865 state = goto[statestack[-1]][pname]
866 statestack.append(state)
867 except SyntaxError:
868 # If an error was set. Enter error recovery state
869 lookaheadstack.append(lookahead) # Save the current lookahead token
870 statestack.pop() # Pop back one state (before the reduce)
871 state = statestack[-1]
872 sym.type = 'error'
873 sym.value = 'error'
874 lookahead = sym
875 errorcount = error_count
876 self.errorok = False
878 continue
879 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
881 if t == 0:
882 n = symstack[-1]
883 result = getattr(n, 'value', None)
884 return result
886 if t is None:
889 # We have some kind of parsing error here. To handle
890 # this, we are going to push the current token onto
891 # the tokenstack and replace it with an 'error' token.
892 # If there are any synchronization rules, they may
893 # catch it.
894 #
895 # In addition to pushing the error token, we call call
896 # the user defined p_error() function if this is the
897 # first syntax error. This function is only called if
898 # errorcount == 0.
899 if errorcount == 0 or self.errorok:
900 errorcount = error_count
901 self.errorok = False
902 errtoken = lookahead
903 if errtoken.type == '$end':
904 errtoken = None # End of file!
905 if self.errorfunc:
906 if errtoken and not hasattr(errtoken, 'lexer'):
907 errtoken.lexer = lexer
908 self.state = state
909 tok = call_errorfunc(self.errorfunc, errtoken, self)
910 if self.errorok:
911 # User must have done some kind of panic
912 # mode recovery on their own. The
913 # returned token is the next lookahead
914 lookahead = tok
915 errtoken = None
916 continue
917 else:
918 if errtoken:
919 if hasattr(errtoken, 'lineno'):
920 lineno = lookahead.lineno
921 else:
922 lineno = 0
923 if lineno:
924 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
925 else:
926 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
927 else:
928 sys.stderr.write('yacc: Parse error in input. EOF\n')
929 return
931 else:
932 errorcount = error_count
934 # case 1: the statestack only has 1 entry on it. If we're in this state, the
935 # entire parse has been rolled back and we're completely hosed. The token is
936 # discarded and we just keep going.
938 if len(statestack) <= 1 and lookahead.type != '$end':
939 lookahead = None
940 errtoken = None
941 state = 0
942 # Nuke the pushback stack
943 del lookaheadstack[:]
944 continue
946 # case 2: the statestack has a couple of entries on it, but we're
947 # at the end of the file. nuke the top entry and generate an error token
949 # Start nuking entries on the stack
950 if lookahead.type == '$end':
951 # Whoa. We're really hosed here. Bail out
952 return
954 if lookahead.type != 'error':
955 sym = symstack[-1]
956 if sym.type == 'error':
957 # Hmmm. Error is on top of stack, we'll just nuke input
958 # symbol and continue
959 #--! TRACKING
960 if tracking:
961 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
962 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
963 #--! TRACKING
964 lookahead = None
965 continue
967 # Create the error symbol for the first time and make it the new lookahead symbol
968 t = YaccSymbol()
969 t.type = 'error'
971 if hasattr(lookahead, 'lineno'):
972 t.lineno = t.endlineno = lookahead.lineno
973 if hasattr(lookahead, 'lexpos'):
974 t.lexpos = t.endlexpos = lookahead.lexpos
975 t.value = lookahead
976 lookaheadstack.append(lookahead)
977 lookahead = t
978 else:
979 sym = symstack.pop()
980 #--! TRACKING
981 if tracking:
982 lookahead.lineno = sym.lineno
983 lookahead.lexpos = sym.lexpos
984 #--! TRACKING
985 statestack.pop()
986 state = statestack[-1]
988 continue
990 # Call an error function here
991 raise RuntimeError('yacc: internal parser error!!!\n')
993 #--! parseopt-end
995 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
996 # parseopt_notrack().
997 #
998 # Optimized version of parseopt() with line number tracking removed.
999 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
1000 # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
1001 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1003 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
1004 #--! parseopt-notrack-start
1005 lookahead = None # Current lookahead symbol
1006 lookaheadstack = [] # Stack of lookahead symbols
1007 actions = self.action # Local reference to action table (to avoid lookup on self.)
1008 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
1009 prod = self.productions # Local reference to production list (to avoid lookup on self.)
1010 defaulted_states = self.defaulted_states # Local reference to defaulted states
1011 pslice = YaccProduction(None) # Production object passed to grammar rules
1012 errorcount = 0 # Used during error recovery
1015 # If no lexer was given, we will try to use the lex module
1016 if not lexer:
1017 from . import lex
1018 lexer = lex.lexer
1020 # Set up the lexer and parser objects on pslice
1021 pslice.lexer = lexer
1022 pslice.parser = self
1024 # If input was supplied, pass to lexer
1025 if input is not None:
1026 lexer.input(input)
1028 if tokenfunc is None:
1029 # Tokenize function
1030 get_token = lexer.token
1031 else:
1032 get_token = tokenfunc
1034 # Set the parser() token method (sometimes used in error recovery)
1035 self.token = get_token
1037 # Set up the state and symbol stacks
1039 statestack = [] # Stack of parsing states
1040 self.statestack = statestack
1041 symstack = [] # Stack of grammar symbols
1042 self.symstack = symstack
1044 pslice.stack = symstack # Put in the production
1045 errtoken = None # Err token
1047 # The start state is assumed to be (0,$end)
1049 statestack.append(0)
1050 sym = YaccSymbol()
1051 sym.type = '$end'
1052 symstack.append(sym)
1053 state = 0
1054 while True:
1055 # Get the next symbol on the input. If a lookahead symbol
1056 # is already set, we just use that. Otherwise, we'll pull
1057 # the next token off of the lookaheadstack or from the lexer
1060 if state not in defaulted_states:
1061 if not lookahead:
1062 if not lookaheadstack:
1063 lookahead = get_token() # Get the next token
1064 else:
1065 lookahead = lookaheadstack.pop()
1066 if not lookahead:
1067 lookahead = YaccSymbol()
1068 lookahead.type = '$end'
1070 # Check the action table
1071 ltype = lookahead.type
1072 t = actions[state].get(ltype)
1073 else:
1074 t = defaulted_states[state]
1077 if t is not None:
1078 if t > 0:
1079 # shift a symbol on the stack
1080 statestack.append(t)
1081 state = t
1084 symstack.append(lookahead)
1085 lookahead = None
1087 # Decrease error count on successful shift
1088 if errorcount:
1089 errorcount -= 1
1090 continue
1092 if t < 0:
1093 # reduce a symbol on the stack, emit a production
1094 p = prod[-t]
1095 pname = p.name
1096 plen = p.len
1098 # Get production function
1099 sym = YaccSymbol()
1100 sym.type = pname # Production name
1101 sym.value = None
1104 if plen:
1105 targ = symstack[-plen-1:]
1106 targ[0] = sym
1109 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1110 # The code enclosed in this section is duplicated
1111 # below as a performance optimization. Make sure
1112 # changes get made in both locations.
1114 pslice.slice = targ
1116 try:
1117 # Call the grammar rule with our special slice object
1118 del symstack[-plen:]
1119 self.state = state
1120 p.callable(pslice)
1121 del statestack[-plen:]
1122 symstack.append(sym)
1123 state = goto[statestack[-1]][pname]
1124 statestack.append(state)
1125 except SyntaxError:
1126 # If an error was set. Enter error recovery state
1127 lookaheadstack.append(lookahead) # Save the current lookahead token
1128 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
1129 statestack.pop() # Pop back one state (before the reduce)
1130 state = statestack[-1]
1131 sym.type = 'error'
1132 sym.value = 'error'
1133 lookahead = sym
1134 errorcount = error_count
1135 self.errorok = False
1137 continue
1138 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1140 else:
1143 targ = [sym]
1145 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1146 # The code enclosed in this section is duplicated
1147 # above as a performance optimization. Make sure
1148 # changes get made in both locations.
1150 pslice.slice = targ
1152 try:
1153 # Call the grammar rule with our special slice object
1154 self.state = state
1155 p.callable(pslice)
1156 symstack.append(sym)
1157 state = goto[statestack[-1]][pname]
1158 statestack.append(state)
1159 except SyntaxError:
1160 # If an error was set. Enter error recovery state
1161 lookaheadstack.append(lookahead) # Save the current lookahead token
1162 statestack.pop() # Pop back one state (before the reduce)
1163 state = statestack[-1]
1164 sym.type = 'error'
1165 sym.value = 'error'
1166 lookahead = sym
1167 errorcount = error_count
1168 self.errorok = False
1170 continue
1171 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1173 if t == 0:
1174 n = symstack[-1]
1175 result = getattr(n, 'value', None)
1176 return result
1178 if t is None:
1181 # We have some kind of parsing error here. To handle
1182 # this, we are going to push the current token onto
1183 # the tokenstack and replace it with an 'error' token.
1184 # If there are any synchronization rules, they may
1185 # catch it.
1186 #
1187 # In addition to pushing the error token, we call call
1188 # the user defined p_error() function if this is the
1189 # first syntax error. This function is only called if
1190 # errorcount == 0.
1191 if errorcount == 0 or self.errorok:
1192 errorcount = error_count
1193 self.errorok = False
1194 errtoken = lookahead
1195 if errtoken.type == '$end':
1196 errtoken = None # End of file!
1197 if self.errorfunc:
1198 if errtoken and not hasattr(errtoken, 'lexer'):
1199 errtoken.lexer = lexer
1200 self.state = state
1201 tok = call_errorfunc(self.errorfunc, errtoken, self)
1202 if self.errorok:
1203 # User must have done some kind of panic
1204 # mode recovery on their own. The
1205 # returned token is the next lookahead
1206 lookahead = tok
1207 errtoken = None
1208 continue
1209 else:
1210 if errtoken:
1211 if hasattr(errtoken, 'lineno'):
1212 lineno = lookahead.lineno
1213 else:
1214 lineno = 0
1215 if lineno:
1216 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
1217 else:
1218 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
1219 else:
1220 sys.stderr.write('yacc: Parse error in input. EOF\n')
1221 return
1223 else:
1224 errorcount = error_count
1226 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1227 # entire parse has been rolled back and we're completely hosed. The token is
1228 # discarded and we just keep going.
1230 if len(statestack) <= 1 and lookahead.type != '$end':
1231 lookahead = None
1232 errtoken = None
1233 state = 0
1234 # Nuke the pushback stack
1235 del lookaheadstack[:]
1236 continue
1238 # case 2: the statestack has a couple of entries on it, but we're
1239 # at the end of the file. nuke the top entry and generate an error token
1241 # Start nuking entries on the stack
1242 if lookahead.type == '$end':
1243 # Whoa. We're really hosed here. Bail out
1244 return
1246 if lookahead.type != 'error':
1247 sym = symstack[-1]
1248 if sym.type == 'error':
1249 # Hmmm. Error is on top of stack, we'll just nuke input
1250 # symbol and continue
1251 lookahead = None
1252 continue
1254 # Create the error symbol for the first time and make it the new lookahead symbol
1255 t = YaccSymbol()
1256 t.type = 'error'
1258 if hasattr(lookahead, 'lineno'):
1259 t.lineno = t.endlineno = lookahead.lineno
1260 if hasattr(lookahead, 'lexpos'):
1261 t.lexpos = t.endlexpos = lookahead.lexpos
1262 t.value = lookahead
1263 lookaheadstack.append(lookahead)
1264 lookahead = t
1265 else:
1266 sym = symstack.pop()
1267 statestack.pop()
1268 state = statestack[-1]
1270 continue
1272 # Call an error function here
1273 raise RuntimeError('yacc: internal parser error!!!\n')
1275 #--! parseopt-notrack-end
1277# -----------------------------------------------------------------------------
1278# === Grammar Representation ===
1279#
1280# The following functions, classes, and variables are used to represent and
1281# manipulate the rules that make up a grammar.
1282# -----------------------------------------------------------------------------
1284# regex matching identifiers
1285_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1287# -----------------------------------------------------------------------------
1288# class Production:
1289#
1290# This class stores the raw information about a single production or grammar rule.
1291# A grammar rule refers to a specification such as this:
1292#
1293# expr : expr PLUS term
1294#
1295# Here are the basic attributes defined on all productions
1296#
1297# name - Name of the production. For example 'expr'
1298# prod - A list of symbols on the right side ['expr','PLUS','term']
1299# prec - Production precedence level
1300# number - Production number.
1301# func - Function that executes on reduce
1302# file - File where production function is defined
1303# lineno - Line number where production function is defined
1304#
1305# The following attributes are defined or optional.
1306#
1307# len - Length of the production (number of symbols on right hand side)
1308# usyms - Set of unique symbols found in the production
1309# -----------------------------------------------------------------------------
1311class Production(object):
1312 reduced = 0
1313 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
1314 self.name = name
1315 self.prod = tuple(prod)
1316 self.number = number
1317 self.func = func
1318 self.callable = None
1319 self.file = file
1320 self.line = line
1321 self.prec = precedence
1323 # Internal settings used during table construction
1325 self.len = len(self.prod) # Length of the production
1327 # Create a list of unique production symbols used in the production
1328 self.usyms = []
1329 for s in self.prod:
1330 if s not in self.usyms:
1331 self.usyms.append(s)
1333 # List of all LR items for the production
1334 self.lr_items = []
1335 self.lr_next = None
1337 # Create a string representation
1338 if self.prod:
1339 self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
1340 else:
1341 self.str = '%s -> <empty>' % self.name
1343 def __str__(self):
1344 return self.str
1346 def __repr__(self):
1347 return 'Production(' + str(self) + ')'
1349 def __len__(self):
1350 return len(self.prod)
1352 def __nonzero__(self):
1353 return 1
1355 def __getitem__(self, index):
1356 return self.prod[index]
1358 # Return the nth lr_item from the production (or None if at the end)
1359 def lr_item(self, n):
1360 if n > len(self.prod):
1361 return None
1362 p = LRItem(self, n)
1363 # Precompute the list of productions immediately following.
1364 try:
1365 p.lr_after = self.Prodnames[p.prod[n+1]]
1366 except (IndexError, KeyError):
1367 p.lr_after = []
1368 try:
1369 p.lr_before = p.prod[n-1]
1370 except IndexError:
1371 p.lr_before = None
1372 return p
1374 # Bind the production function name to a callable
1375 def bind(self, pdict):
1376 if self.func:
1377 self.callable = pdict[self.func]
1379# This class serves as a minimal standin for Production objects when
1380# reading table data from files. It only contains information
1381# actually used by the LR parsing engine, plus some additional
1382# debugging information.
1383class MiniProduction(object):
1384 def __init__(self, str, name, len, func, file, line):
1385 self.name = name
1386 self.len = len
1387 self.func = func
1388 self.callable = None
1389 self.file = file
1390 self.line = line
1391 self.str = str
1393 def __str__(self):
1394 return self.str
1396 def __repr__(self):
1397 return 'MiniProduction(%s)' % self.str
1399 # Bind the production function name to a callable
1400 def bind(self, pdict):
1401 if self.func:
1402 self.callable = pdict[self.func]
1405# -----------------------------------------------------------------------------
1406# class LRItem
1407#
1408# This class represents a specific stage of parsing a production rule. For
1409# example:
1410#
1411# expr : expr . PLUS term
1412#
1413# In the above, the "." represents the current location of the parse. Here
1414# basic attributes:
1415#
1416# name - Name of the production. For example 'expr'
1417# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1418# number - Production number.
1419#
1420# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1421# then lr_next refers to 'expr -> expr PLUS . term'
1422# lr_index - LR item index (location of the ".") in the prod list.
1423# lookaheads - LALR lookahead symbols for this item
1424# len - Length of the production (number of symbols on right hand side)
1425# lr_after - List of all productions that immediately follow
1426# lr_before - Grammar symbol immediately before
1427# -----------------------------------------------------------------------------
1429class LRItem(object):
1430 def __init__(self, p, n):
1431 self.name = p.name
1432 self.prod = list(p.prod)
1433 self.number = p.number
1434 self.lr_index = n
1435 self.lookaheads = {}
1436 self.prod.insert(n, '.')
1437 self.prod = tuple(self.prod)
1438 self.len = len(self.prod)
1439 self.usyms = p.usyms
1441 def __str__(self):
1442 if self.prod:
1443 s = '%s -> %s' % (self.name, ' '.join(self.prod))
1444 else:
1445 s = '%s -> <empty>' % self.name
1446 return s
1448 def __repr__(self):
1449 return 'LRItem(' + str(self) + ')'
1451# -----------------------------------------------------------------------------
1452# rightmost_terminal()
1453#
1454# Return the rightmost terminal from a list of symbols. Used in add_production()
1455# -----------------------------------------------------------------------------
1456def rightmost_terminal(symbols, terminals):
1457 i = len(symbols) - 1
1458 while i >= 0:
1459 if symbols[i] in terminals:
1460 return symbols[i]
1461 i -= 1
1462 return None
1464# -----------------------------------------------------------------------------
1465# === GRAMMAR CLASS ===
1466#
1467# The following class represents the contents of the specified grammar along
1468# with various computed properties such as first sets, follow sets, LR items, etc.
1469# This data is used for critical parts of the table generation process later.
1470# -----------------------------------------------------------------------------
1472class GrammarError(YaccError):
1473 pass
1475class Grammar(object):
1476 def __init__(self, terminals):
1477 self.Productions = [None] # A list of all of the productions. The first
1478 # entry is always reserved for the purpose of
1479 # building an augmented grammar
1481 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
1482 # productions of that nonterminal.
1484 self.Prodmap = {} # A dictionary that is only used to detect duplicate
1485 # productions.
1487 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
1488 # list of the rules where they are used.
1490 for term in terminals:
1491 self.Terminals[term] = []
1493 self.Terminals['error'] = []
1495 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
1496 # of rule numbers where they are used.
1498 self.First = {} # A dictionary of precomputed FIRST(x) symbols
1500 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
1502 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
1503 # form ('right',level) or ('nonassoc', level) or ('left',level)
1505 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
1506 # This is only used to provide error checking and to generate
1507 # a warning about unused precedence rules.
1509 self.Start = None # Starting symbol for the grammar
1512 def __len__(self):
1513 return len(self.Productions)
1515 def __getitem__(self, index):
1516 return self.Productions[index]
1518 # -----------------------------------------------------------------------------
1519 # set_precedence()
1520 #
1521 # Sets the precedence for a given terminal. assoc is the associativity such as
1522 # 'left','right', or 'nonassoc'. level is a numeric level.
1523 #
1524 # -----------------------------------------------------------------------------
1526 def set_precedence(self, term, assoc, level):
1527 assert self.Productions == [None], 'Must call set_precedence() before add_production()'
1528 if term in self.Precedence:
1529 raise GrammarError('Precedence already specified for terminal %r' % term)
1530 if assoc not in ['left', 'right', 'nonassoc']:
1531 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1532 self.Precedence[term] = (assoc, level)
1534 # -----------------------------------------------------------------------------
1535 # add_production()
1536 #
1537 # Given an action function, this function assembles a production rule and
1538 # computes its precedence level.
1539 #
1540 # The production rule is supplied as a list of symbols. For example,
1541 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1542 # symbols ['expr','PLUS','term'].
1543 #
1544 # Precedence is determined by the precedence of the right-most non-terminal
1545 # or the precedence of a terminal specified by %prec.
1546 #
1547 # A variety of error checks are performed to make sure production symbols
1548 # are valid and that %prec is used correctly.
1549 # -----------------------------------------------------------------------------
1551 def add_production(self, prodname, syms, func=None, file='', line=0):
1553 if prodname in self.Terminals:
1554 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
1555 if prodname == 'error':
1556 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
1557 if not _is_identifier.match(prodname):
1558 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
1560 # Look for literal tokens
1561 for n, s in enumerate(syms):
1562 if s[0] in "'\"":
1563 try:
1564 c = eval(s)
1565 if (len(c) > 1):
1566 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1567 (file, line, s, prodname))
1568 if c not in self.Terminals:
1569 self.Terminals[c] = []
1570 syms[n] = c
1571 continue
1572 except SyntaxError:
1573 pass
1574 if not _is_identifier.match(s) and s != '%prec':
1575 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
1577 # Determine the precedence level
1578 if '%prec' in syms:
1579 if syms[-1] == '%prec':
1580 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
1581 if syms[-2] != '%prec':
1582 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1583 (file, line))
1584 precname = syms[-1]
1585 prodprec = self.Precedence.get(precname)
1586 if not prodprec:
1587 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
1588 else:
1589 self.UsedPrecedence.add(precname)
1590 del syms[-2:] # Drop %prec from the rule
1591 else:
1592 # If no %prec, precedence is determined by the rightmost terminal symbol
1593 precname = rightmost_terminal(syms, self.Terminals)
1594 prodprec = self.Precedence.get(precname, ('right', 0))
1596 # See if the rule is already in the rulemap
1597 map = '%s -> %s' % (prodname, syms)
1598 if map in self.Prodmap:
1599 m = self.Prodmap[map]
1600 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
1601 'Previous definition at %s:%d' % (m.file, m.line))
1603 # From this point on, everything is valid. Create a new Production instance
1604 pnumber = len(self.Productions)
1605 if prodname not in self.Nonterminals:
1606 self.Nonterminals[prodname] = []
1608 # Add the production number to Terminals and Nonterminals
1609 for t in syms:
1610 if t in self.Terminals:
1611 self.Terminals[t].append(pnumber)
1612 else:
1613 if t not in self.Nonterminals:
1614 self.Nonterminals[t] = []
1615 self.Nonterminals[t].append(pnumber)
1617 # Create a production and add it to the list of productions
1618 p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1619 self.Productions.append(p)
1620 self.Prodmap[map] = p
1622 # Add to the global productions list
1623 try:
1624 self.Prodnames[prodname].append(p)
1625 except KeyError:
1626 self.Prodnames[prodname] = [p]
1628 # -----------------------------------------------------------------------------
1629 # set_start()
1630 #
1631 # Sets the starting symbol and creates the augmented grammar. Production
1632 # rule 0 is S' -> start where start is the start symbol.
1633 # -----------------------------------------------------------------------------
1635 def set_start(self, start=None):
1636 if not start:
1637 start = self.Productions[1].name
1638 if start not in self.Nonterminals:
1639 raise GrammarError('start symbol %s undefined' % start)
1640 self.Productions[0] = Production(0, "S'", [start])
1641 self.Nonterminals[start].append(0)
1642 self.Start = start
1644 # -----------------------------------------------------------------------------
1645 # find_unreachable()
1646 #
1647 # Find all of the nonterminal symbols that can't be reached from the starting
1648 # symbol. Returns a list of nonterminals that can't be reached.
1649 # -----------------------------------------------------------------------------
1651 def find_unreachable(self):
1653 # Mark all symbols that are reachable from a symbol s
1654 def mark_reachable_from(s):
1655 if s in reachable:
1656 return
1657 reachable.add(s)
1658 for p in self.Prodnames.get(s, []):
1659 for r in p.prod:
1660 mark_reachable_from(r)
1662 reachable = set()
1663 mark_reachable_from(self.Productions[0].prod[0])
1664 return [s for s in self.Nonterminals if s not in reachable]
1666 # -----------------------------------------------------------------------------
1667 # infinite_cycles()
1668 #
1669 # This function looks at the various parsing rules and tries to detect
1670 # infinite recursion cycles (grammar rules where there is no possible way
1671 # to derive a string of only terminals).
1672 # -----------------------------------------------------------------------------
1674 def infinite_cycles(self):
1675 terminates = {}
1677 # Terminals:
1678 for t in self.Terminals:
1679 terminates[t] = True
1681 terminates['$end'] = True
1683 # Nonterminals:
1685 # Initialize to false:
1686 for n in self.Nonterminals:
1687 terminates[n] = False
1689 # Then propagate termination until no change:
1690 while True:
1691 some_change = False
1692 for (n, pl) in self.Prodnames.items():
1693 # Nonterminal n terminates iff any of its productions terminates.
1694 for p in pl:
1695 # Production p terminates iff all of its rhs symbols terminate.
1696 for s in p.prod:
1697 if not terminates[s]:
1698 # The symbol s does not terminate,
1699 # so production p does not terminate.
1700 p_terminates = False
1701 break
1702 else:
1703 # didn't break from the loop,
1704 # so every symbol s terminates
1705 # so production p terminates.
1706 p_terminates = True
1708 if p_terminates:
1709 # symbol n terminates!
1710 if not terminates[n]:
1711 terminates[n] = True
1712 some_change = True
1713 # Don't need to consider any more productions for this n.
1714 break
1716 if not some_change:
1717 break
1719 infinite = []
1720 for (s, term) in terminates.items():
1721 if not term:
1722 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1723 # s is used-but-not-defined, and we've already warned of that,
1724 # so it would be overkill to say that it's also non-terminating.
1725 pass
1726 else:
1727 infinite.append(s)
1729 return infinite
1731 # -----------------------------------------------------------------------------
1732 # undefined_symbols()
1733 #
1734 # Find all symbols that were used the grammar, but not defined as tokens or
1735 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1736 # and prod is the production where the symbol was used.
1737 # -----------------------------------------------------------------------------
1738 def undefined_symbols(self):
1739 result = []
1740 for p in self.Productions:
1741 if not p:
1742 continue
1744 for s in p.prod:
1745 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1746 result.append((s, p))
1747 return result
1749 # -----------------------------------------------------------------------------
1750 # unused_terminals()
1751 #
1752 # Find all terminals that were defined, but not used by the grammar. Returns
1753 # a list of all symbols.
1754 # -----------------------------------------------------------------------------
1755 def unused_terminals(self):
1756 unused_tok = []
1757 for s, v in self.Terminals.items():
1758 if s != 'error' and not v:
1759 unused_tok.append(s)
1761 return unused_tok
1763 # ------------------------------------------------------------------------------
1764 # unused_rules()
1765 #
1766 # Find all grammar rules that were defined, but not used (maybe not reachable)
1767 # Returns a list of productions.
1768 # ------------------------------------------------------------------------------
1770 def unused_rules(self):
1771 unused_prod = []
1772 for s, v in self.Nonterminals.items():
1773 if not v:
1774 p = self.Prodnames[s][0]
1775 unused_prod.append(p)
1776 return unused_prod
1778 # -----------------------------------------------------------------------------
1779 # unused_precedence()
1780 #
1781 # Returns a list of tuples (term,precedence) corresponding to precedence
1782 # rules that were never used by the grammar. term is the name of the terminal
1783 # on which precedence was applied and precedence is a string such as 'left' or
1784 # 'right' corresponding to the type of precedence.
1785 # -----------------------------------------------------------------------------
1787 def unused_precedence(self):
1788 unused = []
1789 for termname in self.Precedence:
1790 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1791 unused.append((termname, self.Precedence[termname][0]))
1793 return unused
1795 # -------------------------------------------------------------------------
1796 # _first()
1797 #
1798 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1799 #
1800 # During execution of compute_first1, the result may be incomplete.
1801 # Afterward (e.g., when called from compute_follow()), it will be complete.
1802 # -------------------------------------------------------------------------
1803 def _first(self, beta):
1805 # We are computing First(x1,x2,x3,...,xn)
1806 result = []
1807 for x in beta:
1808 x_produces_empty = False
1810 # Add all the non-<empty> symbols of First[x] to the result.
1811 for f in self.First[x]:
1812 if f == '<empty>':
1813 x_produces_empty = True
1814 else:
1815 if f not in result:
1816 result.append(f)
1818 if x_produces_empty:
1819 # We have to consider the next x in beta,
1820 # i.e. stay in the loop.
1821 pass
1822 else:
1823 # We don't have to consider any further symbols in beta.
1824 break
1825 else:
1826 # There was no 'break' from the loop,
1827 # so x_produces_empty was true for all x in beta,
1828 # so beta produces empty as well.
1829 result.append('<empty>')
1831 return result
1833 # -------------------------------------------------------------------------
1834 # compute_first()
1835 #
1836 # Compute the value of FIRST1(X) for all symbols
1837 # -------------------------------------------------------------------------
1838 def compute_first(self):
1839 if self.First:
1840 return self.First
1842 # Terminals:
1843 for t in self.Terminals:
1844 self.First[t] = [t]
1846 self.First['$end'] = ['$end']
1848 # Nonterminals:
1850 # Initialize to the empty set:
1851 for n in self.Nonterminals:
1852 self.First[n] = []
1854 # Then propagate symbols until no change:
1855 while True:
1856 some_change = False
1857 for n in self.Nonterminals:
1858 for p in self.Prodnames[n]:
1859 for f in self._first(p.prod):
1860 if f not in self.First[n]:
1861 self.First[n].append(f)
1862 some_change = True
1863 if not some_change:
1864 break
1866 return self.First
1868 # ---------------------------------------------------------------------
1869 # compute_follow()
1870 #
1871 # Computes all of the follow sets for every non-terminal symbol. The
1872 # follow set is the set of all symbols that might follow a given
1873 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1874 # ---------------------------------------------------------------------
1875 def compute_follow(self, start=None):
1876 # If already computed, return the result
1877 if self.Follow:
1878 return self.Follow
1880 # If first sets not computed yet, do that first.
1881 if not self.First:
1882 self.compute_first()
1884 # Add '$end' to the follow list of the start symbol
1885 for k in self.Nonterminals:
1886 self.Follow[k] = []
1888 if not start:
1889 start = self.Productions[1].name
1891 self.Follow[start] = ['$end']
1893 while True:
1894 didadd = False
1895 for p in self.Productions[1:]:
1896 # Here is the production set
1897 for i, B in enumerate(p.prod):
1898 if B in self.Nonterminals:
1899 # Okay. We got a non-terminal in a production
1900 fst = self._first(p.prod[i+1:])
1901 hasempty = False
1902 for f in fst:
1903 if f != '<empty>' and f not in self.Follow[B]:
1904 self.Follow[B].append(f)
1905 didadd = True
1906 if f == '<empty>':
1907 hasempty = True
1908 if hasempty or i == (len(p.prod)-1):
1909 # Add elements of follow(a) to follow(b)
1910 for f in self.Follow[p.name]:
1911 if f not in self.Follow[B]:
1912 self.Follow[B].append(f)
1913 didadd = True
1914 if not didadd:
1915 break
1916 return self.Follow
1919 # -----------------------------------------------------------------------------
1920 # build_lritems()
1921 #
1922 # This function walks the list of productions and builds a complete set of the
1923 # LR items. The LR items are stored in two ways: First, they are uniquely
1924 # numbered and placed in the list _lritems. Second, a linked list of LR items
1925 # is built for each production. For example:
1926 #
1927 # E -> E PLUS E
1928 #
1929 # Creates the list
1930 #
1931 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1932 # -----------------------------------------------------------------------------
1934 def build_lritems(self):
1935 for p in self.Productions:
1936 lastlri = p
1937 i = 0
1938 lr_items = []
1939 while True:
1940 if i > len(p):
1941 lri = None
1942 else:
1943 lri = LRItem(p, i)
1944 # Precompute the list of productions immediately following
1945 try:
1946 lri.lr_after = self.Prodnames[lri.prod[i+1]]
1947 except (IndexError, KeyError):
1948 lri.lr_after = []
1949 try:
1950 lri.lr_before = lri.prod[i-1]
1951 except IndexError:
1952 lri.lr_before = None
1954 lastlri.lr_next = lri
1955 if not lri:
1956 break
1957 lr_items.append(lri)
1958 lastlri = lri
1959 i += 1
1960 p.lr_items = lr_items
1962# -----------------------------------------------------------------------------
1963# == Class LRTable ==
1964#
1965# This basic class represents a basic table of LR parsing information.
1966# Methods for generating the tables are not defined here. They are defined
1967# in the derived class LRGeneratedTable.
1968# -----------------------------------------------------------------------------
1970class VersionError(YaccError):
1971 pass
1973class LRTable(object):
1974 def __init__(self):
1975 self.lr_action = None
1976 self.lr_goto = None
1977 self.lr_productions = None
1978 self.lr_method = None
1980 def read_table(self, module):
1981 if isinstance(module, types.ModuleType):
1982 parsetab = module
1983 else:
1984 exec('import %s' % module)
1985 parsetab = sys.modules[module]
1987 if parsetab._tabversion != __tabversion__:
1988 raise VersionError('yacc table file version is out of date')
1990 self.lr_action = parsetab._lr_action
1991 self.lr_goto = parsetab._lr_goto
1993 self.lr_productions = []
1994 for p in parsetab._lr_productions:
1995 self.lr_productions.append(MiniProduction(*p))
1997 self.lr_method = parsetab._lr_method
1998 return parsetab._lr_signature
2000 def read_pickle(self, filename):
2001 try:
2002 import cPickle as pickle
2003 except ImportError:
2004 import pickle
2006 if not os.path.exists(filename):
2007 raise ImportError
2009 in_f = open(filename, 'rb')
2011 tabversion = pickle.load(in_f)
2012 if tabversion != __tabversion__:
2013 raise VersionError('yacc table file version is out of date')
2014 self.lr_method = pickle.load(in_f)
2015 signature = pickle.load(in_f)
2016 self.lr_action = pickle.load(in_f)
2017 self.lr_goto = pickle.load(in_f)
2018 productions = pickle.load(in_f)
2020 self.lr_productions = []
2021 for p in productions:
2022 self.lr_productions.append(MiniProduction(*p))
2024 in_f.close()
2025 return signature
2027 # Bind all production function names to callable objects in pdict
2028 def bind_callables(self, pdict):
2029 for p in self.lr_productions:
2030 p.bind(pdict)
2033# -----------------------------------------------------------------------------
2034# === LR Generator ===
2035#
2036# The following classes and functions are used to generate LR parsing tables on
2037# a grammar.
2038# -----------------------------------------------------------------------------
2040# -----------------------------------------------------------------------------
2041# digraph()
2042# traverse()
2043#
2044# The following two functions are used to compute set valued functions
2045# of the form:
2046#
2047# F(x) = F'(x) U U{F(y) | x R y}
2048#
2049# This is used to compute the values of Read() sets as well as FOLLOW sets
2050# in LALR(1) generation.
2051#
2052# Inputs: X - An input set
2053# R - A relation
2054# FP - Set-valued function
2055# ------------------------------------------------------------------------------
2057def digraph(X, R, FP):
2058 N = {}
2059 for x in X:
2060 N[x] = 0
2061 stack = []
2062 F = {}
2063 for x in X:
2064 if N[x] == 0:
2065 traverse(x, N, stack, F, X, R, FP)
2066 return F
2068def traverse(x, N, stack, F, X, R, FP):
2069 stack.append(x)
2070 d = len(stack)
2071 N[x] = d
2072 F[x] = FP(x) # F(X) <- F'(x)
2074 rel = R(x) # Get y's related to x
2075 for y in rel:
2076 if N[y] == 0:
2077 traverse(y, N, stack, F, X, R, FP)
2078 N[x] = min(N[x], N[y])
2079 for a in F.get(y, []):
2080 if a not in F[x]:
2081 F[x].append(a)
2082 if N[x] == d:
2083 N[stack[-1]] = MAXINT
2084 F[stack[-1]] = F[x]
2085 element = stack.pop()
2086 while element != x:
2087 N[stack[-1]] = MAXINT
2088 F[stack[-1]] = F[x]
2089 element = stack.pop()
2091class LALRError(YaccError):
2092 pass
2094# -----------------------------------------------------------------------------
2095# == LRGeneratedTable ==
2096#
2097# This class implements the LR table generation algorithm. There are no
2098# public methods except for write()
2099# -----------------------------------------------------------------------------
2101class LRGeneratedTable(LRTable):
2102 def __init__(self, grammar, method='LALR', log=None):
2103 if method not in ['SLR', 'LALR']:
2104 raise LALRError('Unsupported method %s' % method)
2106 self.grammar = grammar
2107 self.lr_method = method
2109 # Set up the logger
2110 if not log:
2111 log = NullLogger()
2112 self.log = log
2114 # Internal attributes
2115 self.lr_action = {} # Action table
2116 self.lr_goto = {} # Goto table
2117 self.lr_productions = grammar.Productions # Copy of grammar Production array
2118 self.lr_goto_cache = {} # Cache of computed gotos
2119 self.lr0_cidhash = {} # Cache of closures
2121 self._add_count = 0 # Internal counter used to detect cycles
2123 # Diagonistic information filled in by the table generator
2124 self.sr_conflict = 0
2125 self.rr_conflict = 0
2126 self.conflicts = [] # List of conflicts
2128 self.sr_conflicts = []
2129 self.rr_conflicts = []
2131 # Build the tables
2132 self.grammar.build_lritems()
2133 self.grammar.compute_first()
2134 self.grammar.compute_follow()
2135 self.lr_parse_table()
2137 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2139 def lr0_closure(self, I):
2140 self._add_count += 1
2142 # Add everything in I to J
2143 J = I[:]
2144 didadd = True
2145 while didadd:
2146 didadd = False
2147 for j in J:
2148 for x in j.lr_after:
2149 if getattr(x, 'lr0_added', 0) == self._add_count:
2150 continue
2151 # Add B --> .G to J
2152 J.append(x.lr_next)
2153 x.lr0_added = self._add_count
2154 didadd = True
2156 return J
2158 # Compute the LR(0) goto function goto(I,X) where I is a set
2159 # of LR(0) items and X is a grammar symbol. This function is written
2160 # in a way that guarantees uniqueness of the generated goto sets
2161 # (i.e. the same goto set will never be returned as two different Python
2162 # objects). With uniqueness, we can later do fast set comparisons using
2163 # id(obj) instead of element-wise comparison.
2165 def lr0_goto(self, I, x):
2166 # First we look for a previously cached entry
2167 g = self.lr_goto_cache.get((id(I), x))
2168 if g:
2169 return g
2171 # Now we generate the goto set in a way that guarantees uniqueness
2172 # of the result
2174 s = self.lr_goto_cache.get(x)
2175 if not s:
2176 s = {}
2177 self.lr_goto_cache[x] = s
2179 gs = []
2180 for p in I:
2181 n = p.lr_next
2182 if n and n.lr_before == x:
2183 s1 = s.get(id(n))
2184 if not s1:
2185 s1 = {}
2186 s[id(n)] = s1
2187 gs.append(n)
2188 s = s1
2189 g = s.get('$end')
2190 if not g:
2191 if gs:
2192 g = self.lr0_closure(gs)
2193 s['$end'] = g
2194 else:
2195 s['$end'] = gs
2196 self.lr_goto_cache[(id(I), x)] = g
2197 return g
2199 # Compute the LR(0) sets of item function
2200 def lr0_items(self):
2201 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2202 i = 0
2203 for I in C:
2204 self.lr0_cidhash[id(I)] = i
2205 i += 1
2207 # Loop over the items in C and each grammar symbols
2208 i = 0
2209 while i < len(C):
2210 I = C[i]
2211 i += 1
2213 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2214 asyms = {}
2215 for ii in I:
2216 for s in ii.usyms:
2217 asyms[s] = None
2219 for x in asyms:
2220 g = self.lr0_goto(I, x)
2221 if not g or id(g) in self.lr0_cidhash:
2222 continue
2223 self.lr0_cidhash[id(g)] = len(C)
2224 C.append(g)
2226 return C
2228 # -----------------------------------------------------------------------------
2229 # ==== LALR(1) Parsing ====
2230 #
2231 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2232 # relying upon Follow() sets when performing reductions, a more selective
2233 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2234 # Thus, we mainly just have to focus on calculating the lookahead sets.
2235 #
2236 # The method used here is due to DeRemer and Pennelo (1982).
2237 #
2238 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2239 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2240 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2241 #
2242 # Further details can also be found in:
2243 #
2244 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2245 # McGraw-Hill Book Company, (1985).
2246 #
2247 # -----------------------------------------------------------------------------
2249 # -----------------------------------------------------------------------------
2250 # compute_nullable_nonterminals()
2251 #
2252 # Creates a dictionary containing all of the non-terminals that might produce
2253 # an empty production.
2254 # -----------------------------------------------------------------------------
2256 def compute_nullable_nonterminals(self):
2257 nullable = set()
2258 num_nullable = 0
2259 while True:
2260 for p in self.grammar.Productions[1:]:
2261 if p.len == 0:
2262 nullable.add(p.name)
2263 continue
2264 for t in p.prod:
2265 if t not in nullable:
2266 break
2267 else:
2268 nullable.add(p.name)
2269 if len(nullable) == num_nullable:
2270 break
2271 num_nullable = len(nullable)
2272 return nullable
2274 # -----------------------------------------------------------------------------
2275 # find_nonterminal_trans(C)
2276 #
2277 # Given a set of LR(0) items, this functions finds all of the non-terminal
2278 # transitions. These are transitions in which a dot appears immediately before
2279 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2280 # is the state number and N is the nonterminal symbol.
2281 #
2282 # The input C is the set of LR(0) items.
2283 # -----------------------------------------------------------------------------
2285 def find_nonterminal_transitions(self, C):
2286 trans = []
2287 for stateno, state in enumerate(C):
2288 for p in state:
2289 if p.lr_index < p.len - 1:
2290 t = (stateno, p.prod[p.lr_index+1])
2291 if t[1] in self.grammar.Nonterminals:
2292 if t not in trans:
2293 trans.append(t)
2294 return trans
2296 # -----------------------------------------------------------------------------
2297 # dr_relation()
2298 #
2299 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2300 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2301 #
2302 # Returns a list of terminals.
2303 # -----------------------------------------------------------------------------
2305 def dr_relation(self, C, trans, nullable):
2306 state, N = trans
2307 terms = []
2309 g = self.lr0_goto(C[state], N)
2310 for p in g:
2311 if p.lr_index < p.len - 1:
2312 a = p.prod[p.lr_index+1]
2313 if a in self.grammar.Terminals:
2314 if a not in terms:
2315 terms.append(a)
2317 # This extra bit is to handle the start state
2318 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2319 terms.append('$end')
2321 return terms
2323 # -----------------------------------------------------------------------------
2324 # reads_relation()
2325 #
2326 # Computes the READS() relation (p,A) READS (t,C).
2327 # -----------------------------------------------------------------------------
2329 def reads_relation(self, C, trans, empty):
2330 # Look for empty transitions
2331 rel = []
2332 state, N = trans
2334 g = self.lr0_goto(C[state], N)
2335 j = self.lr0_cidhash.get(id(g), -1)
2336 for p in g:
2337 if p.lr_index < p.len - 1:
2338 a = p.prod[p.lr_index + 1]
2339 if a in empty:
2340 rel.append((j, a))
2342 return rel
2344 # -----------------------------------------------------------------------------
2345 # compute_lookback_includes()
2346 #
2347 # Determines the lookback and includes relations
2348 #
2349 # LOOKBACK:
2350 #
2351 # This relation is determined by running the LR(0) state machine forward.
2352 # For example, starting with a production "N : . A B C", we run it forward
2353 # to obtain "N : A B C ." We then build a relationship between this final
2354 # state and the starting state. These relationships are stored in a dictionary
2355 # lookdict.
2356 #
2357 # INCLUDES:
2358 #
2359 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2360 #
2361 # This relation is used to determine non-terminal transitions that occur
2362 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2363 # if the following holds:
2364 #
2365 # B -> LAT, where T -> epsilon and p' -L-> p
2366 #
2367 # L is essentially a prefix (which may be empty), T is a suffix that must be
2368 # able to derive an empty string. State p' must lead to state p with the string L.
2369 #
2370 # -----------------------------------------------------------------------------
2372 def compute_lookback_includes(self, C, trans, nullable):
2373 lookdict = {} # Dictionary of lookback relations
2374 includedict = {} # Dictionary of include relations
2376 # Make a dictionary of non-terminal transitions
2377 dtrans = {}
2378 for t in trans:
2379 dtrans[t] = 1
2381 # Loop over all transitions and compute lookbacks and includes
2382 for state, N in trans:
2383 lookb = []
2384 includes = []
2385 for p in C[state]:
2386 if p.name != N:
2387 continue
2389 # Okay, we have a name match. We now follow the production all the way
2390 # through the state machine until we get the . on the right hand side
2392 lr_index = p.lr_index
2393 j = state
2394 while lr_index < p.len - 1:
2395 lr_index = lr_index + 1
2396 t = p.prod[lr_index]
2398 # Check to see if this symbol and state are a non-terminal transition
2399 if (j, t) in dtrans:
2400 # Yes. Okay, there is some chance that this is an includes relation
2401 # the only way to know for certain is whether the rest of the
2402 # production derives empty
2404 li = lr_index + 1
2405 while li < p.len:
2406 if p.prod[li] in self.grammar.Terminals:
2407 break # No forget it
2408 if p.prod[li] not in nullable:
2409 break
2410 li = li + 1
2411 else:
2412 # Appears to be a relation between (j,t) and (state,N)
2413 includes.append((j, t))
2415 g = self.lr0_goto(C[j], t) # Go to next set
2416 j = self.lr0_cidhash.get(id(g), -1) # Go to next state
2418 # When we get here, j is the final state, now we have to locate the production
2419 for r in C[j]:
2420 if r.name != p.name:
2421 continue
2422 if r.len != p.len:
2423 continue
2424 i = 0
2425 # This look is comparing a production ". A B C" with "A B C ."
2426 while i < r.lr_index:
2427 if r.prod[i] != p.prod[i+1]:
2428 break
2429 i = i + 1
2430 else:
2431 lookb.append((j, r))
2432 for i in includes:
2433 if i not in includedict:
2434 includedict[i] = []
2435 includedict[i].append((state, N))
2436 lookdict[(state, N)] = lookb
2438 return lookdict, includedict
2440 # -----------------------------------------------------------------------------
2441 # compute_read_sets()
2442 #
2443 # Given a set of LR(0) items, this function computes the read sets.
2444 #
2445 # Inputs: C = Set of LR(0) items
2446 # ntrans = Set of nonterminal transitions
2447 # nullable = Set of empty transitions
2448 #
2449 # Returns a set containing the read sets
2450 # -----------------------------------------------------------------------------
2452 def compute_read_sets(self, C, ntrans, nullable):
2453 FP = lambda x: self.dr_relation(C, x, nullable)
2454 R = lambda x: self.reads_relation(C, x, nullable)
2455 F = digraph(ntrans, R, FP)
2456 return F
2458 # -----------------------------------------------------------------------------
2459 # compute_follow_sets()
2460 #
2461 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2462 # and an include set, this function computes the follow sets
2463 #
2464 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2465 #
2466 # Inputs:
2467 # ntrans = Set of nonterminal transitions
2468 # readsets = Readset (previously computed)
2469 # inclsets = Include sets (previously computed)
2470 #
2471 # Returns a set containing the follow sets
2472 # -----------------------------------------------------------------------------
2474 def compute_follow_sets(self, ntrans, readsets, inclsets):
2475 FP = lambda x: readsets[x]
2476 R = lambda x: inclsets.get(x, [])
2477 F = digraph(ntrans, R, FP)
2478 return F
2480 # -----------------------------------------------------------------------------
2481 # add_lookaheads()
2482 #
2483 # Attaches the lookahead symbols to grammar rules.
2484 #
2485 # Inputs: lookbacks - Set of lookback relations
2486 # followset - Computed follow set
2487 #
2488 # This function directly attaches the lookaheads to productions contained
2489 # in the lookbacks set
2490 # -----------------------------------------------------------------------------
2492 def add_lookaheads(self, lookbacks, followset):
2493 for trans, lb in lookbacks.items():
2494 # Loop over productions in lookback
2495 for state, p in lb:
2496 if state not in p.lookaheads:
2497 p.lookaheads[state] = []
2498 f = followset.get(trans, [])
2499 for a in f:
2500 if a not in p.lookaheads[state]:
2501 p.lookaheads[state].append(a)
2503 # -----------------------------------------------------------------------------
2504 # add_lalr_lookaheads()
2505 #
2506 # This function does all of the work of adding lookahead information for use
2507 # with LALR parsing
2508 # -----------------------------------------------------------------------------
2510 def add_lalr_lookaheads(self, C):
2511 # Determine all of the nullable nonterminals
2512 nullable = self.compute_nullable_nonterminals()
2514 # Find all non-terminal transitions
2515 trans = self.find_nonterminal_transitions(C)
2517 # Compute read sets
2518 readsets = self.compute_read_sets(C, trans, nullable)
2520 # Compute lookback/includes relations
2521 lookd, included = self.compute_lookback_includes(C, trans, nullable)
2523 # Compute LALR FOLLOW sets
2524 followsets = self.compute_follow_sets(trans, readsets, included)
2526 # Add all of the lookaheads
2527 self.add_lookaheads(lookd, followsets)
2529 # -----------------------------------------------------------------------------
2530 # lr_parse_table()
2531 #
2532 # This function constructs the parse tables for SLR or LALR
2533 # -----------------------------------------------------------------------------
2534 def lr_parse_table(self):
2535 Productions = self.grammar.Productions
2536 Precedence = self.grammar.Precedence
2537 goto = self.lr_goto # Goto array
2538 action = self.lr_action # Action array
2539 log = self.log # Logger for output
2541 actionp = {} # Action production array (temporary)
2543 log.info('Parsing method: %s', self.lr_method)
2545 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2546 # This determines the number of states
2548 C = self.lr0_items()
2550 if self.lr_method == 'LALR':
2551 self.add_lalr_lookaheads(C)
2553 # Build the parser table, state by state
2554 st = 0
2555 for I in C:
2556 # Loop over each production in I
2557 actlist = [] # List of actions
2558 st_action = {}
2559 st_actionp = {}
2560 st_goto = {}
2561 log.info('')
2562 log.info('state %d', st)
2563 log.info('')
2564 for p in I:
2565 log.info(' (%d) %s', p.number, p)
2566 log.info('')
2568 for p in I:
2569 if p.len == p.lr_index + 1:
2570 if p.name == "S'":
2571 # Start symbol. Accept!
2572 st_action['$end'] = 0
2573 st_actionp['$end'] = p
2574 else:
2575 # We are at the end of a production. Reduce!
2576 if self.lr_method == 'LALR':
2577 laheads = p.lookaheads[st]
2578 else:
2579 laheads = self.grammar.Follow[p.name]
2580 for a in laheads:
2581 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
2582 r = st_action.get(a)
2583 if r is not None:
2584 # Whoa. Have a shift/reduce or reduce/reduce conflict
2585 if r > 0:
2586 # Need to decide on shift or reduce here
2587 # By default we favor shifting. Need to add
2588 # some precedence rules here.
2590 # Shift precedence comes from the token
2591 sprec, slevel = Precedence.get(a, ('right', 0))
2593 # Reduce precedence comes from rule being reduced (p)
2594 rprec, rlevel = Productions[p.number].prec
2596 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2597 # We really need to reduce here.
2598 st_action[a] = -p.number
2599 st_actionp[a] = p
2600 if not slevel and not rlevel:
2601 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2602 self.sr_conflicts.append((st, a, 'reduce'))
2603 Productions[p.number].reduced += 1
2604 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2605 st_action[a] = None
2606 else:
2607 # Hmmm. Guess we'll keep the shift
2608 if not rlevel:
2609 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2610 self.sr_conflicts.append((st, a, 'shift'))
2611 elif r < 0:
2612 # Reduce/reduce conflict. In this case, we favor the rule
2613 # that was defined first in the grammar file
2614 oldp = Productions[-r]
2615 pp = Productions[p.number]
2616 if oldp.line > pp.line:
2617 st_action[a] = -p.number
2618 st_actionp[a] = p
2619 chosenp, rejectp = pp, oldp
2620 Productions[p.number].reduced += 1
2621 Productions[oldp.number].reduced -= 1
2622 else:
2623 chosenp, rejectp = oldp, pp
2624 self.rr_conflicts.append((st, chosenp, rejectp))
2625 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2626 a, st_actionp[a].number, st_actionp[a])
2627 else:
2628 raise LALRError('Unknown conflict in state %d' % st)
2629 else:
2630 st_action[a] = -p.number
2631 st_actionp[a] = p
2632 Productions[p.number].reduced += 1
2633 else:
2634 i = p.lr_index
2635 a = p.prod[i+1] # Get symbol right after the "."
2636 if a in self.grammar.Terminals:
2637 g = self.lr0_goto(I, a)
2638 j = self.lr0_cidhash.get(id(g), -1)
2639 if j >= 0:
2640 # We are in a shift state
2641 actlist.append((a, p, 'shift and go to state %d' % j))
2642 r = st_action.get(a)
2643 if r is not None:
2644 # Whoa have a shift/reduce or shift/shift conflict
2645 if r > 0:
2646 if r != j:
2647 raise LALRError('Shift/shift conflict in state %d' % st)
2648 elif r < 0:
2649 # Do a precedence check.
2650 # - if precedence of reduce rule is higher, we reduce.
2651 # - if precedence of reduce is same and left assoc, we reduce.
2652 # - otherwise we shift
2654 # Shift precedence comes from the token
2655 sprec, slevel = Precedence.get(a, ('right', 0))
2657 # Reduce precedence comes from the rule that could have been reduced
2658 rprec, rlevel = Productions[st_actionp[a].number].prec
2660 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2661 # We decide to shift here... highest precedence to shift
2662 Productions[st_actionp[a].number].reduced -= 1
2663 st_action[a] = j
2664 st_actionp[a] = p
2665 if not rlevel:
2666 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2667 self.sr_conflicts.append((st, a, 'shift'))
2668 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2669 st_action[a] = None
2670 else:
2671 # Hmmm. Guess we'll keep the reduce
2672 if not slevel and not rlevel:
2673 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2674 self.sr_conflicts.append((st, a, 'reduce'))
2676 else:
2677 raise LALRError('Unknown conflict in state %d' % st)
2678 else:
2679 st_action[a] = j
2680 st_actionp[a] = p
2682 # Print the actions associated with each terminal
2683 _actprint = {}
2684 for a, p, m in actlist:
2685 if a in st_action:
2686 if p is st_actionp[a]:
2687 log.info(' %-15s %s', a, m)
2688 _actprint[(a, m)] = 1
2689 log.info('')
2690 # Print the actions that were not used. (debugging)
2691 not_used = 0
2692 for a, p, m in actlist:
2693 if a in st_action:
2694 if p is not st_actionp[a]:
2695 if not (a, m) in _actprint:
2696 log.debug(' ! %-15s [ %s ]', a, m)
2697 not_used = 1
2698 _actprint[(a, m)] = 1
2699 if not_used:
2700 log.debug('')
2702 # Construct the goto table for this state
2704 nkeys = {}
2705 for ii in I:
2706 for s in ii.usyms:
2707 if s in self.grammar.Nonterminals:
2708 nkeys[s] = None
2709 for n in nkeys:
2710 g = self.lr0_goto(I, n)
2711 j = self.lr0_cidhash.get(id(g), -1)
2712 if j >= 0:
2713 st_goto[n] = j
2714 log.info(' %-30s shift and go to state %d', n, j)
2716 action[st] = st_action
2717 actionp[st] = st_actionp
2718 goto[st] = st_goto
2719 st += 1
2721 # -----------------------------------------------------------------------------
2722 # write()
2723 #
2724 # This function writes the LR parsing tables to a file
2725 # -----------------------------------------------------------------------------
2727 def write_table(self, tabmodule, outputdir='', signature=''):
2728 if isinstance(tabmodule, types.ModuleType):
2729 raise IOError("Won't overwrite existing tabmodule")
2731 basemodulename = tabmodule.split('.')[-1]
2732 filename = os.path.join(outputdir, basemodulename) + '.py'
2733 try:
2734 f = open(filename, 'w')
2736 f.write('''
2737# %s
2738# This file is automatically generated. Do not edit.
2739# pylint: disable=W,C,R
2740_tabversion = %r
2742_lr_method = %r
2744_lr_signature = %r
2745 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
2747 # Change smaller to 0 to go back to original tables
2748 smaller = 1
2750 # Factor out names to try and make smaller
2751 if smaller:
2752 items = {}
2754 for s, nd in self.lr_action.items():
2755 for name, v in nd.items():
2756 i = items.get(name)
2757 if not i:
2758 i = ([], [])
2759 items[name] = i
2760 i[0].append(s)
2761 i[1].append(v)
2763 f.write('\n_lr_action_items = {')
2764 for k, v in items.items():
2765 f.write('%r:([' % k)
2766 for i in v[0]:
2767 f.write('%r,' % i)
2768 f.write('],[')
2769 for i in v[1]:
2770 f.write('%r,' % i)
2772 f.write(']),')
2773 f.write('}\n')
2775 f.write('''
2776_lr_action = {}
2777for _k, _v in _lr_action_items.items():
2778 for _x,_y in zip(_v[0],_v[1]):
2779 if not _x in _lr_action: _lr_action[_x] = {}
2780 _lr_action[_x][_k] = _y
2781del _lr_action_items
2782''')
2784 else:
2785 f.write('\n_lr_action = { ')
2786 for k, v in self.lr_action.items():
2787 f.write('(%r,%r):%r,' % (k[0], k[1], v))
2788 f.write('}\n')
2790 if smaller:
2791 # Factor out names to try and make smaller
2792 items = {}
2794 for s, nd in self.lr_goto.items():
2795 for name, v in nd.items():
2796 i = items.get(name)
2797 if not i:
2798 i = ([], [])
2799 items[name] = i
2800 i[0].append(s)
2801 i[1].append(v)
2803 f.write('\n_lr_goto_items = {')
2804 for k, v in items.items():
2805 f.write('%r:([' % k)
2806 for i in v[0]:
2807 f.write('%r,' % i)
2808 f.write('],[')
2809 for i in v[1]:
2810 f.write('%r,' % i)
2812 f.write(']),')
2813 f.write('}\n')
2815 f.write('''
2816_lr_goto = {}
2817for _k, _v in _lr_goto_items.items():
2818 for _x, _y in zip(_v[0], _v[1]):
2819 if not _x in _lr_goto: _lr_goto[_x] = {}
2820 _lr_goto[_x][_k] = _y
2821del _lr_goto_items
2822''')
2823 else:
2824 f.write('\n_lr_goto = { ')
2825 for k, v in self.lr_goto.items():
2826 f.write('(%r,%r):%r,' % (k[0], k[1], v))
2827 f.write('}\n')
2829 # Write production table
2830 f.write('_lr_productions = [\n')
2831 for p in self.lr_productions:
2832 if p.func:
2833 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
2834 p.func, os.path.basename(p.file), p.line))
2835 else:
2836 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
2837 f.write(']\n')
2838 f.close()
2840 except IOError as e:
2841 raise
2844 # -----------------------------------------------------------------------------
2845 # pickle_table()
2846 #
2847 # This function pickles the LR parsing tables to a supplied file object
2848 # -----------------------------------------------------------------------------
2850 def pickle_table(self, filename, signature=''):
2851 try:
2852 import cPickle as pickle
2853 except ImportError:
2854 import pickle
2855 with open(filename, 'wb') as outf:
2856 pickle.dump(__tabversion__, outf, pickle_protocol)
2857 pickle.dump(self.lr_method, outf, pickle_protocol)
2858 pickle.dump(signature, outf, pickle_protocol)
2859 pickle.dump(self.lr_action, outf, pickle_protocol)
2860 pickle.dump(self.lr_goto, outf, pickle_protocol)
2862 outp = []
2863 for p in self.lr_productions:
2864 if p.func:
2865 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2866 else:
2867 outp.append((str(p), p.name, p.len, None, None, None))
2868 pickle.dump(outp, outf, pickle_protocol)
2870# -----------------------------------------------------------------------------
2871# === INTROSPECTION ===
2872#
2873# The following functions and classes are used to implement the PLY
2874# introspection features followed by the yacc() function itself.
2875# -----------------------------------------------------------------------------
2877# -----------------------------------------------------------------------------
2878# get_caller_module_dict()
2879#
2880# This function returns a dictionary containing all of the symbols defined within
2881# a caller further down the call stack. This is used to get the environment
2882# associated with the yacc() call if none was provided.
2883# -----------------------------------------------------------------------------
2885def get_caller_module_dict(levels):
2886 f = sys._getframe(levels)
2887 ldict = f.f_globals.copy()
2888 if f.f_globals != f.f_locals:
2889 ldict.update(f.f_locals)
2890 return ldict
2892# -----------------------------------------------------------------------------
2893# parse_grammar()
2894#
2895# This takes a raw grammar rule string and parses it into production data
2896# -----------------------------------------------------------------------------
2897def parse_grammar(doc, file, line):
2898 grammar = []
2899 # Split the doc string into lines
2900 pstrings = doc.splitlines()
2901 lastp = None
2902 dline = line
2903 for ps in pstrings:
2904 dline += 1
2905 p = ps.split()
2906 if not p:
2907 continue
2908 try:
2909 if p[0] == '|':
2910 # This is a continuation of a previous rule
2911 if not lastp:
2912 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2913 prodname = lastp
2914 syms = p[1:]
2915 else:
2916 prodname = p[0]
2917 lastp = prodname
2918 syms = p[2:]
2919 assign = p[1]
2920 if assign != ':' and assign != '::=':
2921 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2923 grammar.append((file, dline, prodname, syms))
2924 except SyntaxError:
2925 raise
2926 except Exception:
2927 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
2929 return grammar
2931# -----------------------------------------------------------------------------
2932# ParserReflect()
2933#
2934# This class represents information extracted for building a parser including
2935# start symbol, error function, tokens, precedence list, action functions,
2936# etc.
2937# -----------------------------------------------------------------------------
2938class ParserReflect(object):
2939 def __init__(self, pdict, log=None):
2940 self.pdict = pdict
2941 self.start = None
2942 self.error_func = None
2943 self.tokens = None
2944 self.modules = set()
2945 self.grammar = []
2946 self.error = False
2948 if log is None:
2949 self.log = PlyLogger(sys.stderr)
2950 else:
2951 self.log = log
2953 # Get all of the basic information
2954 def get_all(self):
2955 self.get_start()
2956 self.get_error_func()
2957 self.get_tokens()
2958 self.get_precedence()
2959 self.get_pfunctions()
2961 # Validate all of the information
2962 def validate_all(self):
2963 self.validate_start()
2964 self.validate_error_func()
2965 self.validate_tokens()
2966 self.validate_precedence()
2967 self.validate_pfunctions()
2968 self.validate_modules()
2969 return self.error
2971 # Compute a signature over the grammar
2972 def signature(self):
2973 parts = []
2974 try:
2975 if self.start:
2976 parts.append(self.start)
2977 if self.prec:
2978 parts.append(''.join([''.join(p) for p in self.prec]))
2979 if self.tokens:
2980 parts.append(' '.join(self.tokens))
2981 for f in self.pfuncs:
2982 if f[3]:
2983 parts.append(f[3])
2984 except (TypeError, ValueError):
2985 pass
2986 return ''.join(parts)
2988 # -----------------------------------------------------------------------------
2989 # validate_modules()
2990 #
2991 # This method checks to see if there are duplicated p_rulename() functions
2992 # in the parser module file. Without this function, it is really easy for
2993 # users to make mistakes by cutting and pasting code fragments (and it's a real
2994 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2995 # we just do a little regular expression pattern matching of def statements
2996 # to try and detect duplicates.
2997 # -----------------------------------------------------------------------------
2999 def validate_modules(self):
3000 # Match def p_funcname(
3001 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
3003 for module in self.modules:
3004 try:
3005 lines, linen = inspect.getsourcelines(module)
3006 except IOError:
3007 continue
3009 counthash = {}
3010 for linen, line in enumerate(lines):
3011 linen += 1
3012 m = fre.match(line)
3013 if m:
3014 name = m.group(1)
3015 prev = counthash.get(name)
3016 if not prev:
3017 counthash[name] = linen
3018 else:
3019 filename = inspect.getsourcefile(module)
3020 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
3021 filename, linen, name, prev)
3023 # Get the start symbol
3024 def get_start(self):
3025 self.start = self.pdict.get('start')
3027 # Validate the start symbol
3028 def validate_start(self):
3029 if self.start is not None:
3030 if not isinstance(self.start, string_types):
3031 self.log.error("'start' must be a string")
3033 # Look for error handler
3034 def get_error_func(self):
3035 self.error_func = self.pdict.get('p_error')
3037 # Validate the error function
3038 def validate_error_func(self):
3039 if self.error_func:
3040 if isinstance(self.error_func, types.FunctionType):
3041 ismethod = 0
3042 elif isinstance(self.error_func, types.MethodType):
3043 ismethod = 1
3044 else:
3045 self.log.error("'p_error' defined, but is not a function or method")
3046 self.error = True
3047 return
3049 eline = self.error_func.__code__.co_firstlineno
3050 efile = self.error_func.__code__.co_filename
3051 module = inspect.getmodule(self.error_func)
3052 self.modules.add(module)
3054 argcount = self.error_func.__code__.co_argcount - ismethod
3055 if argcount != 1:
3056 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
3057 self.error = True
3059 # Get the tokens map
3060 def get_tokens(self):
3061 tokens = self.pdict.get('tokens')
3062 if not tokens:
3063 self.log.error('No token list is defined')
3064 self.error = True
3065 return
3067 if not isinstance(tokens, (list, tuple)):
3068 self.log.error('tokens must be a list or tuple')
3069 self.error = True
3070 return
3072 if not tokens:
3073 self.log.error('tokens is empty')
3074 self.error = True
3075 return
3077 self.tokens = sorted(tokens)
3079 # Validate the tokens
3080 def validate_tokens(self):
3081 # Validate the tokens.
3082 if 'error' in self.tokens:
3083 self.log.error("Illegal token name 'error'. Is a reserved word")
3084 self.error = True
3085 return
3087 terminals = set()
3088 for n in self.tokens:
3089 if n in terminals:
3090 self.log.warning('Token %r multiply defined', n)
3091 terminals.add(n)
3093 # Get the precedence map (if any)
3094 def get_precedence(self):
3095 self.prec = self.pdict.get('precedence')
3097 # Validate and parse the precedence map
3098 def validate_precedence(self):
3099 preclist = []
3100 if self.prec:
3101 if not isinstance(self.prec, (list, tuple)):
3102 self.log.error('precedence must be a list or tuple')
3103 self.error = True
3104 return
3105 for level, p in enumerate(self.prec):
3106 if not isinstance(p, (list, tuple)):
3107 self.log.error('Bad precedence table')
3108 self.error = True
3109 return
3111 if len(p) < 2:
3112 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
3113 self.error = True
3114 return
3115 assoc = p[0]
3116 if not isinstance(assoc, string_types):
3117 self.log.error('precedence associativity must be a string')
3118 self.error = True
3119 return
3120 for term in p[1:]:
3121 if not isinstance(term, string_types):
3122 self.log.error('precedence items must be strings')
3123 self.error = True
3124 return
3125 preclist.append((term, assoc, level+1))
3126 self.preclist = preclist
3128 # Get all p_functions from the grammar
3129 def get_pfunctions(self):
3130 p_functions = []
3131 for name, item in self.pdict.items():
3132 if not name.startswith('p_') or name == 'p_error':
3133 continue
3134 if isinstance(item, (types.FunctionType, types.MethodType)):
3135 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)
3136 module = inspect.getmodule(item)
3137 p_functions.append((line, module, name, item.__doc__))
3139 # Sort all of the actions by line number; make sure to stringify
3140 # modules to make them sortable, since `line` may not uniquely sort all
3141 # p functions
3142 p_functions.sort(key=lambda p_function: (
3143 p_function[0],
3144 str(p_function[1]),
3145 p_function[2],
3146 p_function[3]))
3147 self.pfuncs = p_functions
3149 # Validate all of the p_functions
3150 def validate_pfunctions(self):
3151 grammar = []
3152 # Check for non-empty symbols
3153 if len(self.pfuncs) == 0:
3154 self.log.error('no rules of the form p_rulename are defined')
3155 self.error = True
3156 return
3158 for line, module, name, doc in self.pfuncs:
3159 file = inspect.getsourcefile(module)
3160 func = self.pdict[name]
3161 if isinstance(func, types.MethodType):
3162 reqargs = 2
3163 else:
3164 reqargs = 1
3165 if func.__code__.co_argcount > reqargs:
3166 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
3167 self.error = True
3168 elif func.__code__.co_argcount < reqargs:
3169 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
3170 self.error = True
3171 elif not func.__doc__:
3172 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
3173 file, line, func.__name__)
3174 else:
3175 try:
3176 parsed_g = parse_grammar(doc, file, line)
3177 for g in parsed_g:
3178 grammar.append((name, g))
3179 except SyntaxError as e:
3180 self.log.error(str(e))
3181 self.error = True
3183 # Looks like a valid grammar rule
3184 # Mark the file in which defined.
3185 self.modules.add(module)
3187 # Secondary validation step that looks for p_ definitions that are not functions
3188 # or functions that look like they might be grammar rules.
3190 for n, v in self.pdict.items():
3191 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
3192 continue
3193 if n.startswith('t_'):
3194 continue
3195 if n.startswith('p_') and n != 'p_error':
3196 self.log.warning('%r not defined as a function', n)
3197 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
3198 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
3199 if v.__doc__:
3200 try:
3201 doc = v.__doc__.split(' ')
3202 if doc[1] == ':':
3203 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3204 v.__code__.co_filename, v.__code__.co_firstlineno, n)
3205 except IndexError:
3206 pass
3208 self.grammar = grammar
3210# -----------------------------------------------------------------------------
3211# yacc(module)
3212#
3213# Build a parser
3214# -----------------------------------------------------------------------------
3216def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3217 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
3218 outputdir=None, debuglog=None, errorlog=None, picklefile=None):
3220 if tabmodule is None:
3221 tabmodule = tab_module
3223 # Reference to the parsing method of the last built parser
3224 global parse
3226 # If pickling is enabled, table files are not created
3227 if picklefile:
3228 write_tables = 0
3230 if errorlog is None:
3231 errorlog = PlyLogger(sys.stderr)
3233 # Get the module dictionary used for the parser
3234 if module:
3235 _items = [(k, getattr(module, k)) for k in dir(module)]
3236 pdict = dict(_items)
3237 # If no __file__ or __package__ attributes are available, try to obtain them
3238 # from the __module__ instead
3239 if '__file__' not in pdict:
3240 pdict['__file__'] = sys.modules[pdict['__module__']].__file__
3241 if '__package__' not in pdict and '__module__' in pdict:
3242 if hasattr(sys.modules[pdict['__module__']], '__package__'):
3243 pdict['__package__'] = sys.modules[pdict['__module__']].__package__
3244 else:
3245 pdict = get_caller_module_dict(2)
3247 if outputdir is None:
3248 # If no output directory is set, the location of the output files
3249 # is determined according to the following rules:
3250 # - If tabmodule specifies a package, files go into that package directory
3251 # - Otherwise, files go in the same directory as the specifying module
3252 if isinstance(tabmodule, types.ModuleType):
3253 srcfile = tabmodule.__file__
3254 else:
3255 if '.' not in tabmodule:
3256 srcfile = pdict['__file__']
3257 else:
3258 parts = tabmodule.split('.')
3259 pkgname = '.'.join(parts[:-1])
3260 exec('import %s' % pkgname)
3261 srcfile = getattr(sys.modules[pkgname], '__file__', '')
3262 outputdir = os.path.dirname(srcfile)
3264 # Determine if the module is package of a package or not.
3265 # If so, fix the tabmodule setting so that tables load correctly
3266 pkg = pdict.get('__package__')
3267 if pkg and isinstance(tabmodule, str):
3268 if '.' not in tabmodule:
3269 tabmodule = pkg + '.' + tabmodule
3273 # Set start symbol if it's specified directly using an argument
3274 if start is not None:
3275 pdict['start'] = start
3277 # Collect parser information from the dictionary
3278 pinfo = ParserReflect(pdict, log=errorlog)
3279 pinfo.get_all()
3281 if pinfo.error:
3282 raise YaccError('Unable to build parser')
3284 # Check signature against table files (if any)
3285 signature = pinfo.signature()
3287 # Read the tables
3288 try:
3289 lr = LRTable()
3290 if picklefile:
3291 read_signature = lr.read_pickle(picklefile)
3292 else:
3293 read_signature = lr.read_table(tabmodule)
3294 if optimize or (read_signature == signature):
3295 try:
3296 lr.bind_callables(pinfo.pdict)
3297 parser = LRParser(lr, pinfo.error_func)
3298 parse = parser.parse
3299 return parser
3300 except Exception as e:
3301 errorlog.warning('There was a problem loading the table file: %r', e)
3302 except VersionError as e:
3303 errorlog.warning(str(e))
3304 except ImportError:
3305 pass
3307 if debuglog is None:
3308 if debug:
3309 try:
3310 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
3311 except IOError as e:
3312 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3313 debuglog = NullLogger()
3314 else:
3315 debuglog = NullLogger()
3317 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
3319 errors = False
3321 # Validate the parser information
3322 if pinfo.validate_all():
3323 raise YaccError('Unable to build parser')
3325 if not pinfo.error_func:
3326 errorlog.warning('no p_error() function is defined')
3328 # Create a grammar object
3329 grammar = Grammar(pinfo.tokens)
3331 # Set precedence level for terminals
3332 for term, assoc, level in pinfo.preclist:
3333 try:
3334 grammar.set_precedence(term, assoc, level)
3335 except GrammarError as e:
3336 errorlog.warning('%s', e)
3338 # Add productions to the grammar
3339 for funcname, gram in pinfo.grammar:
3340 file, line, prodname, syms = gram
3341 try:
3342 grammar.add_production(prodname, syms, funcname, file, line)
3343 except GrammarError as e:
3344 errorlog.error('%s', e)
3345 errors = True
3347 # Set the grammar start symbols
3348 try:
3349 if start is None:
3350 grammar.set_start(pinfo.start)
3351 else:
3352 grammar.set_start(start)
3353 except GrammarError as e:
3354 errorlog.error(str(e))
3355 errors = True
3357 if errors:
3358 raise YaccError('Unable to build parser')
3360 # Verify the grammar structure
3361 undefined_symbols = grammar.undefined_symbols()
3362 for sym, prod in undefined_symbols:
3363 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
3364 errors = True
3366 unused_terminals = grammar.unused_terminals()
3367 if unused_terminals:
3368 debuglog.info('')
3369 debuglog.info('Unused terminals:')
3370 debuglog.info('')
3371 for term in unused_terminals:
3372 errorlog.warning('Token %r defined, but not used', term)
3373 debuglog.info(' %s', term)
3375 # Print out all productions to the debug log
3376 if debug:
3377 debuglog.info('')
3378 debuglog.info('Grammar')
3379 debuglog.info('')
3380 for n, p in enumerate(grammar.Productions):
3381 debuglog.info('Rule %-5d %s', n, p)
3383 # Find unused non-terminals
3384 unused_rules = grammar.unused_rules()
3385 for prod in unused_rules:
3386 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
3388 if len(unused_terminals) == 1:
3389 errorlog.warning('There is 1 unused token')
3390 if len(unused_terminals) > 1:
3391 errorlog.warning('There are %d unused tokens', len(unused_terminals))
3393 if len(unused_rules) == 1:
3394 errorlog.warning('There is 1 unused rule')
3395 if len(unused_rules) > 1:
3396 errorlog.warning('There are %d unused rules', len(unused_rules))
3398 if debug:
3399 debuglog.info('')
3400 debuglog.info('Terminals, with rules where they appear')
3401 debuglog.info('')
3402 terms = list(grammar.Terminals)
3403 terms.sort()
3404 for term in terms:
3405 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
3407 debuglog.info('')
3408 debuglog.info('Nonterminals, with rules where they appear')
3409 debuglog.info('')
3410 nonterms = list(grammar.Nonterminals)
3411 nonterms.sort()
3412 for nonterm in nonterms:
3413 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
3414 debuglog.info('')
3416 if check_recursion:
3417 unreachable = grammar.find_unreachable()
3418 for u in unreachable:
3419 errorlog.warning('Symbol %r is unreachable', u)
3421 infinite = grammar.infinite_cycles()
3422 for inf in infinite:
3423 errorlog.error('Infinite recursion detected for symbol %r', inf)
3424 errors = True
3426 unused_prec = grammar.unused_precedence()
3427 for term, assoc in unused_prec:
3428 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
3429 errors = True
3431 if errors:
3432 raise YaccError('Unable to build parser')
3434 # Run the LRGeneratedTable on the grammar
3435 if debug:
3436 errorlog.debug('Generating %s tables', method)
3438 lr = LRGeneratedTable(grammar, method, debuglog)
3440 if debug:
3441 num_sr = len(lr.sr_conflicts)
3443 # Report shift/reduce and reduce/reduce conflicts
3444 if num_sr == 1:
3445 errorlog.warning('1 shift/reduce conflict')
3446 elif num_sr > 1:
3447 errorlog.warning('%d shift/reduce conflicts', num_sr)
3449 num_rr = len(lr.rr_conflicts)
3450 if num_rr == 1:
3451 errorlog.warning('1 reduce/reduce conflict')
3452 elif num_rr > 1:
3453 errorlog.warning('%d reduce/reduce conflicts', num_rr)
3455 # Write out conflicts to the output file
3456 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3457 debuglog.warning('')
3458 debuglog.warning('Conflicts:')
3459 debuglog.warning('')
3461 for state, tok, resolution in lr.sr_conflicts:
3462 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)
3464 already_reported = set()
3465 for state, rule, rejected in lr.rr_conflicts:
3466 if (state, id(rule), id(rejected)) in already_reported:
3467 continue
3468 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3469 debuglog.warning('rejected rule (%s) in state %d', rejected, state)
3470 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3471 errorlog.warning('rejected rule (%s) in state %d', rejected, state)
3472 already_reported.add((state, id(rule), id(rejected)))
3474 warned_never = []
3475 for state, rule, rejected in lr.rr_conflicts:
3476 if not rejected.reduced and (rejected not in warned_never):
3477 debuglog.warning('Rule (%s) is never reduced', rejected)
3478 errorlog.warning('Rule (%s) is never reduced', rejected)
3479 warned_never.append(rejected)
3481 # Write the table file if requested
3482 if write_tables:
3483 try:
3484 lr.write_table(tabmodule, outputdir, signature)
3485 if tabmodule in sys.modules:
3486 del sys.modules[tabmodule]
3487 except IOError as e:
3488 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3490 # Write a pickled version of the tables
3491 if picklefile:
3492 try:
3493 lr.pickle_table(picklefile, signature)
3494 except IOError as e:
3495 errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3497 # Build the parser
3498 lr.bind_callables(pinfo.pdict)
3499 parser = LRParser(lr, pinfo.error_func)
3501 parse = parser.parse
3502 return parser