Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/ply/yacc.py: 5%
1929 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-12 09:01 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-12 09:01 +0000
1# -----------------------------------------------------------------------------
2# ply: yacc.py
3#
4# Copyright (C) 2001-2018
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12# * Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14# * Redistributions in binary form must reproduce the above copyright notice,
15# this list of conditions and the following disclaimer in the documentation
16# and/or other materials provided with the distribution.
17# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18# endorse or promote products derived from this software without
19# specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32# -----------------------------------------------------------------------------
33#
34# This implements an LR parser that is constructed from grammar rules defined
35# as Python functions. The grammar is specified by supplying the BNF inside
36# Python documentation strings. The inspiration for this technique was borrowed
37# from John Aycock's Spark parsing system. PLY might be viewed as cross between
38# Spark and the GNU bison utility.
39#
40# The current implementation is only somewhat object-oriented. The
41# LR parser itself is defined in terms of an object (which allows multiple
42# parsers to co-exist). However, most of the variables used during table
43# construction are defined in terms of global variables. Users shouldn't
44# notice unless they are trying to define multiple parsers at the same
45# time using threads (in which case they should have their head examined).
46#
47# This implementation supports both SLR and LALR(1) parsing. LALR(1)
48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51# by the more efficient DeRemer and Pennello algorithm.
52#
53# :::::::: WARNING :::::::
54#
55# Construction of LR parsing tables is fairly complicated and expensive.
56# To make this module run fast, a *LOT* of work has been put into
57# optimization---often at the expensive of readability and what might
58# consider to be good Python "coding style." Modify the code at your
59# own risk!
60# ----------------------------------------------------------------------------
62import inspect
63import os.path
64import re
65import sys
66import types
67import warnings
69__version__ = "3.11"
70__tabversion__ = "3.10"
72# -----------------------------------------------------------------------------
73# === User configurable parameters ===
74#
75# Change these to modify the default behavior of yacc (if you wish)
76# -----------------------------------------------------------------------------
78yaccdebug = True # Debugging mode. If set, yacc generates a
79# a 'parser.out' file in the current directory
81debug_file = "parser.out" # Default name of the debugging file
82tab_module = "parsetab" # Default name of the table module
83default_lr = "LALR" # Default LR table generation method
85error_count = 3 # Number of symbols that must be shifted to leave recovery mode
87yaccdevel = False # Set to True if developing yacc. This turns off optimized
88# implementations of certain functions.
90resultlimit = 40 # Size limit of results when running in debug mode.
92pickle_protocol = 0 # Protocol to use when writing pickle files
94# String type-checking compatibility
95if sys.version_info[0] < 3: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 string_types = basestring
97else:
98 string_types = str
100MAXINT = sys.maxsize
102# This object is a stand-in for a logging object created by the
103# logging module. PLY will use this by default to create things
104# such as the parser.out file. If a user wants more detailed
105# information, they can create their own logging object and pass
106# it into PLY.
109class PlyLogger(object):
110 def __init__(self, f):
111 self.f = f
113 def debug(self, msg, *args, **kwargs):
114 self.f.write((msg % args) + "\n")
116 info = debug
118 def warning(self, msg, *args, **kwargs):
119 self.f.write("WARNING: " + (msg % args) + "\n")
121 def error(self, msg, *args, **kwargs):
122 self.f.write("ERROR: " + (msg % args) + "\n")
124 critical = debug
127# Null logger is used when no output is generated. Does nothing.
128class NullLogger(object):
129 def __getattribute__(self, name):
130 return self
132 def __call__(self, *args, **kwargs):
133 return self
136# Exception raised for yacc-related errors
137class YaccError(Exception):
138 pass
141# Format the result message that the parser produces when running in debug mode.
142def format_result(r):
143 repr_str = repr(r)
144 if "\n" in repr_str:
145 repr_str = repr(repr_str)
146 if len(repr_str) > resultlimit:
147 repr_str = repr_str[:resultlimit] + " ..."
148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__, id(r), repr_str)
149 return result
152# Format stack entries when the parser is running in debug mode
153def format_stack_entry(r):
154 repr_str = repr(r)
155 if "\n" in repr_str:
156 repr_str = repr(repr_str)
157 if len(repr_str) < 16:
158 return repr_str
159 else:
160 return "<%s @ 0x%x>" % (type(r).__name__, id(r))
163# Panic mode error recovery support. This feature is being reworked--much of the
164# code here is to offer a deprecation/backwards compatible transition
166_errok = None
167_token = None
168_restart = None
169_warnmsg = """PLY: Don't use global functions errok(), token(), and restart() in p_error().
170Instead, invoke the methods on the associated parser instance:
172 def p_error(p):
173 ...
174 # Use parser.errok(), parser.token(), parser.restart()
175 ...
177 parser = yacc.yacc()
178"""
181def errok():
182 warnings.warn(_warnmsg)
183 return _errok()
186def restart():
187 warnings.warn(_warnmsg)
188 return _restart()
191def token():
192 warnings.warn(_warnmsg)
193 return _token()
196# Utility function to call the p_error() function with some deprecation hacks
197def call_errorfunc(errorfunc, token, parser):
198 global _errok, _token, _restart
199 _errok = parser.errok
200 _token = parser.token
201 _restart = parser.restart
202 r = errorfunc(token)
203 try:
204 del _errok, _token, _restart
205 except NameError:
206 pass
207 return r
210# -----------------------------------------------------------------------------
211# === LR Parsing Engine ===
212#
213# The following classes are used for the LR parser itself. These are not
214# used during table construction and are independent of the actual LR
215# table generation algorithm
216# -----------------------------------------------------------------------------
218# This class is used to hold non-terminal grammar symbols during parsing.
219# It normally has the following attributes set:
220# .type = Grammar symbol type
221# .value = Symbol value
222# .lineno = Starting line number
223# .endlineno = Ending line number (optional, set automatically)
224# .lexpos = Starting lex position
225# .endlexpos = Ending lex position (optional, set automatically)
228class YaccSymbol:
229 def __str__(self):
230 return self.type
232 def __repr__(self):
233 return str(self)
236# This class is a wrapper around the objects actually passed to each
237# grammar rule. Index lookup and assignment actually assign the
238# .value attribute of the underlying YaccSymbol object.
239# The lineno() method returns the line number of a given
240# item (or 0 if not defined). The linespan() method returns
241# a tuple of (startline,endline) representing the range of lines
242# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
243# representing the range of positional information for a symbol.
246class YaccProduction:
247 def __init__(self, s, stack=None):
248 self.slice = s
249 self.stack = stack
250 self.lexer = None
251 self.parser = None
253 def __getitem__(self, n):
254 if isinstance(n, slice):
255 return [s.value for s in self.slice[n]]
256 elif n >= 0:
257 return self.slice[n].value
258 else:
259 return self.stack[n].value
261 def __setitem__(self, n, v):
262 self.slice[n].value = v
264 def __getslice__(self, i, j):
265 return [s.value for s in self.slice[i:j]]
267 def __len__(self):
268 return len(self.slice)
270 def lineno(self, n):
271 return getattr(self.slice[n], "lineno", 0)
273 def set_lineno(self, n, lineno):
274 self.slice[n].lineno = lineno
276 def linespan(self, n):
277 startline = getattr(self.slice[n], "lineno", 0)
278 endline = getattr(self.slice[n], "endlineno", startline)
279 return startline, endline
281 def lexpos(self, n):
282 return getattr(self.slice[n], "lexpos", 0)
284 def set_lexpos(self, n, lexpos):
285 self.slice[n].lexpos = lexpos
287 def lexspan(self, n):
288 startpos = getattr(self.slice[n], "lexpos", 0)
289 endpos = getattr(self.slice[n], "endlexpos", startpos)
290 return startpos, endpos
292 def error(self):
293 raise SyntaxError
296# -----------------------------------------------------------------------------
297# == LRParser ==
298#
299# The LR Parsing engine.
300# -----------------------------------------------------------------------------
303class LRParser:
304 def __init__(self, lrtab, errorf):
305 self.productions = lrtab.lr_productions
306 self.action = lrtab.lr_action
307 self.goto = lrtab.lr_goto
308 self.errorfunc = errorf
309 self.set_defaulted_states()
310 self.errorok = True
312 def errok(self):
313 self.errorok = True
315 def restart(self):
316 del self.statestack[:]
317 del self.symstack[:]
318 sym = YaccSymbol()
319 sym.type = "$end"
320 self.symstack.append(sym)
321 self.statestack.append(0)
323 # Defaulted state support.
324 # This method identifies parser states where there is only one possible reduction action.
325 # For such states, the parser can make a choose to make a rule reduction without consuming
326 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
327 # certain kinds of advanced parsing situations where the lexer and parser interact with
328 # each other or change states (i.e., manipulation of scope, lexer states, etc.).
329 #
330 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
331 def set_defaulted_states(self):
332 self.defaulted_states = {}
333 for state, actions in self.action.items():
334 rules = list(actions.values())
335 if len(rules) == 1 and rules[0] < 0:
336 self.defaulted_states[state] = rules[0]
338 def disable_defaulted_states(self):
339 self.defaulted_states = {}
341 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
342 if debug or yaccdevel:
343 if isinstance(debug, int):
344 debug = PlyLogger(sys.stderr)
345 return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
346 elif tracking:
347 return self.parseopt(input, lexer, debug, tracking, tokenfunc)
348 else:
349 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
351 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
352 # parsedebug().
353 #
354 # This is the debugging enabled version of parse(). All changes made to the
355 # parsing engine should be made here. Optimized versions of this function
356 # are automatically created by the ply/ygen.py script. This script cuts out
357 # sections enclosed in markers such as this:
358 #
359 # #--! DEBUG
360 # statements
361 # #--! DEBUG
362 #
363 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
365 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
366 # --! parsedebug-start
367 lookahead = None # Current lookahead symbol
368 lookaheadstack = [] # Stack of lookahead symbols
369 actions = self.action # Local reference to action table (to avoid lookup on self.)
370 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
371 prod = self.productions # Local reference to production list (to avoid lookup on self.)
372 defaulted_states = self.defaulted_states # Local reference to defaulted states
373 pslice = YaccProduction(None) # Production object passed to grammar rules
374 errorcount = 0 # Used during error recovery
376 # --! DEBUG
377 debug.info("PLY: PARSE DEBUG START")
378 # --! DEBUG
380 # If no lexer was given, we will try to use the lex module
381 if not lexer:
382 from . import lex
384 lexer = lex.lexer
386 # Set up the lexer and parser objects on pslice
387 pslice.lexer = lexer
388 pslice.parser = self
390 # If input was supplied, pass to lexer
391 if input is not None:
392 lexer.input(input)
394 if tokenfunc is None:
395 # Tokenize function
396 get_token = lexer.token
397 else:
398 get_token = tokenfunc
400 # Set the parser() token method (sometimes used in error recovery)
401 self.token = get_token
403 # Set up the state and symbol stacks
405 statestack = [] # Stack of parsing states
406 self.statestack = statestack
407 symstack = [] # Stack of grammar symbols
408 self.symstack = symstack
410 pslice.stack = symstack # Put in the production
411 errtoken = None # Err token
413 # The start state is assumed to be (0,$end)
415 statestack.append(0)
416 sym = YaccSymbol()
417 sym.type = "$end"
418 symstack.append(sym)
419 state = 0
420 while True:
421 # Get the next symbol on the input. If a lookahead symbol
422 # is already set, we just use that. Otherwise, we'll pull
423 # the next token off of the lookaheadstack or from the lexer
425 # --! DEBUG
426 debug.debug("")
427 debug.debug("State : %s", state)
428 # --! DEBUG
430 if state not in defaulted_states:
431 if not lookahead:
432 if not lookaheadstack:
433 lookahead = get_token() # Get the next token
434 else:
435 lookahead = lookaheadstack.pop()
436 if not lookahead:
437 lookahead = YaccSymbol()
438 lookahead.type = "$end"
440 # Check the action table
441 ltype = lookahead.type
442 t = actions[state].get(ltype)
443 else:
444 t = defaulted_states[state]
445 # --! DEBUG
446 debug.debug("Defaulted state %s: Reduce using %d", state, -t)
447 # --! DEBUG
449 # --! DEBUG
450 debug.debug(
451 "Stack : %s",
452 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip(),
453 )
454 # --! DEBUG
456 if t is not None:
457 if t > 0:
458 # shift a symbol on the stack
459 statestack.append(t)
460 state = t
462 # --! DEBUG
463 debug.debug("Action : Shift and goto state %s", t)
464 # --! DEBUG
466 symstack.append(lookahead)
467 lookahead = None
469 # Decrease error count on successful shift
470 if errorcount:
471 errorcount -= 1
472 continue
474 if t < 0:
475 # reduce a symbol on the stack, emit a production
476 p = prod[-t]
477 pname = p.name
478 plen = p.len
480 # Get production function
481 sym = YaccSymbol()
482 sym.type = pname # Production name
483 sym.value = None
485 # --! DEBUG
486 if plen:
487 debug.info(
488 "Action : Reduce rule [%s] with %s and goto state %d",
489 p.str,
490 "[" + ",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]]) + "]",
491 goto[statestack[-1 - plen]][pname],
492 )
493 else:
494 debug.info(
495 "Action : Reduce rule [%s] with %s and goto state %d",
496 p.str,
497 [],
498 goto[statestack[-1]][pname],
499 )
501 # --! DEBUG
503 if plen:
504 targ = symstack[-plen - 1 :]
505 targ[0] = sym
507 # --! TRACKING
508 if tracking:
509 t1 = targ[1]
510 sym.lineno = t1.lineno
511 sym.lexpos = t1.lexpos
512 t1 = targ[-1]
513 sym.endlineno = getattr(t1, "endlineno", t1.lineno)
514 sym.endlexpos = getattr(t1, "endlexpos", t1.lexpos)
515 # --! TRACKING
517 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
518 # The code enclosed in this section is duplicated
519 # below as a performance optimization. Make sure
520 # changes get made in both locations.
522 pslice.slice = targ
524 try:
525 # Call the grammar rule with our special slice object
526 del symstack[-plen:]
527 self.state = state
528 p.callable(pslice)
529 del statestack[-plen:]
530 # --! DEBUG
531 debug.info("Result : %s", format_result(pslice[0]))
532 # --! DEBUG
533 symstack.append(sym)
534 state = goto[statestack[-1]][pname]
535 statestack.append(state)
536 except SyntaxError:
537 # If an error was set. Enter error recovery state
538 lookaheadstack.append(lookahead) # Save the current lookahead token
539 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
540 statestack.pop() # Pop back one state (before the reduce)
541 state = statestack[-1]
542 sym.type = "error"
543 sym.value = "error"
544 lookahead = sym
545 errorcount = error_count
546 self.errorok = False
548 continue
549 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
551 else:
553 # --! TRACKING
554 if tracking:
555 sym.lineno = lexer.lineno
556 sym.lexpos = lexer.lexpos
557 # --! TRACKING
559 targ = [sym]
561 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
562 # The code enclosed in this section is duplicated
563 # above as a performance optimization. Make sure
564 # changes get made in both locations.
566 pslice.slice = targ
568 try:
569 # Call the grammar rule with our special slice object
570 self.state = state
571 p.callable(pslice)
572 # --! DEBUG
573 debug.info("Result : %s", format_result(pslice[0]))
574 # --! DEBUG
575 symstack.append(sym)
576 state = goto[statestack[-1]][pname]
577 statestack.append(state)
578 except SyntaxError:
579 # If an error was set. Enter error recovery state
580 lookaheadstack.append(lookahead) # Save the current lookahead token
581 statestack.pop() # Pop back one state (before the reduce)
582 state = statestack[-1]
583 sym.type = "error"
584 sym.value = "error"
585 lookahead = sym
586 errorcount = error_count
587 self.errorok = False
589 continue
590 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
592 if t == 0:
593 n = symstack[-1]
594 result = getattr(n, "value", None)
595 # --! DEBUG
596 debug.info("Done : Returning %s", format_result(result))
597 debug.info("PLY: PARSE DEBUG END")
598 # --! DEBUG
599 return result
601 if t is None:
603 # --! DEBUG
604 debug.error(
605 "Error : %s",
606 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip(),
607 )
608 # --! DEBUG
610 # We have some kind of parsing error here. To handle
611 # this, we are going to push the current token onto
612 # the tokenstack and replace it with an 'error' token.
613 # If there are any synchronization rules, they may
614 # catch it.
615 #
616 # In addition to pushing the error token, we call call
617 # the user defined p_error() function if this is the
618 # first syntax error. This function is only called if
619 # errorcount == 0.
620 if errorcount == 0 or self.errorok:
621 errorcount = error_count
622 self.errorok = False
623 errtoken = lookahead
624 if errtoken.type == "$end":
625 errtoken = None # End of file!
626 if self.errorfunc:
627 if errtoken and not hasattr(errtoken, "lexer"):
628 errtoken.lexer = lexer
629 self.state = state
630 tok = call_errorfunc(self.errorfunc, errtoken, self)
631 if self.errorok:
632 # User must have done some kind of panic
633 # mode recovery on their own. The
634 # returned token is the next lookahead
635 lookahead = tok
636 errtoken = None
637 continue
638 else:
639 if errtoken:
640 if hasattr(errtoken, "lineno"):
641 lineno = lookahead.lineno
642 else:
643 lineno = 0
644 if lineno:
645 sys.stderr.write(
646 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)
647 )
648 else:
649 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
650 else:
651 sys.stderr.write("yacc: Parse error in input. EOF\n")
652 return
654 else:
655 errorcount = error_count
657 # case 1: the statestack only has 1 entry on it. If we're in this state, the
658 # entire parse has been rolled back and we're completely hosed. The token is
659 # discarded and we just keep going.
661 if len(statestack) <= 1 and lookahead.type != "$end":
662 lookahead = None
663 errtoken = None
664 state = 0
665 # Nuke the pushback stack
666 del lookaheadstack[:]
667 continue
669 # case 2: the statestack has a couple of entries on it, but we're
670 # at the end of the file. nuke the top entry and generate an error token
672 # Start nuking entries on the stack
673 if lookahead.type == "$end":
674 # Whoa. We're really hosed here. Bail out
675 return
677 if lookahead.type != "error":
678 sym = symstack[-1]
679 if sym.type == "error":
680 # Hmmm. Error is on top of stack, we'll just nuke input
681 # symbol and continue
682 # --! TRACKING
683 if tracking:
684 sym.endlineno = getattr(lookahead, "lineno", sym.lineno)
685 sym.endlexpos = getattr(lookahead, "lexpos", sym.lexpos)
686 # --! TRACKING
687 lookahead = None
688 continue
690 # Create the error symbol for the first time and make it the new lookahead symbol
691 t = YaccSymbol()
692 t.type = "error"
694 if hasattr(lookahead, "lineno"):
695 t.lineno = t.endlineno = lookahead.lineno
696 if hasattr(lookahead, "lexpos"):
697 t.lexpos = t.endlexpos = lookahead.lexpos
698 t.value = lookahead
699 lookaheadstack.append(lookahead)
700 lookahead = t
701 else:
702 sym = symstack.pop()
703 # --! TRACKING
704 if tracking:
705 lookahead.lineno = sym.lineno
706 lookahead.lexpos = sym.lexpos
707 # --! TRACKING
708 statestack.pop()
709 state = statestack[-1]
711 continue
713 # Call an error function here
714 raise RuntimeError("yacc: internal parser error!!!\n")
716 # --! parsedebug-end
718 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
719 # parseopt().
720 #
721 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
722 # This code is automatically generated by the ply/ygen.py script. Make
723 # changes to the parsedebug() method instead.
724 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
726 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
727 # --! parseopt-start
728 lookahead = None # Current lookahead symbol
729 lookaheadstack = [] # Stack of lookahead symbols
730 actions = self.action # Local reference to action table (to avoid lookup on self.)
731 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
732 prod = self.productions # Local reference to production list (to avoid lookup on self.)
733 defaulted_states = self.defaulted_states # Local reference to defaulted states
734 pslice = YaccProduction(None) # Production object passed to grammar rules
735 errorcount = 0 # Used during error recovery
737 # If no lexer was given, we will try to use the lex module
738 if not lexer:
739 from . import lex
741 lexer = lex.lexer
743 # Set up the lexer and parser objects on pslice
744 pslice.lexer = lexer
745 pslice.parser = self
747 # If input was supplied, pass to lexer
748 if input is not None:
749 lexer.input(input)
751 if tokenfunc is None:
752 # Tokenize function
753 get_token = lexer.token
754 else:
755 get_token = tokenfunc
757 # Set the parser() token method (sometimes used in error recovery)
758 self.token = get_token
760 # Set up the state and symbol stacks
762 statestack = [] # Stack of parsing states
763 self.statestack = statestack
764 symstack = [] # Stack of grammar symbols
765 self.symstack = symstack
767 pslice.stack = symstack # Put in the production
768 errtoken = None # Err token
770 # The start state is assumed to be (0,$end)
772 statestack.append(0)
773 sym = YaccSymbol()
774 sym.type = "$end"
775 symstack.append(sym)
776 state = 0
777 while True:
778 # Get the next symbol on the input. If a lookahead symbol
779 # is already set, we just use that. Otherwise, we'll pull
780 # the next token off of the lookaheadstack or from the lexer
782 if state not in defaulted_states:
783 if not lookahead:
784 if not lookaheadstack:
785 lookahead = get_token() # Get the next token
786 else:
787 lookahead = lookaheadstack.pop()
788 if not lookahead:
789 lookahead = YaccSymbol()
790 lookahead.type = "$end"
792 # Check the action table
793 ltype = lookahead.type
794 t = actions[state].get(ltype)
795 else:
796 t = defaulted_states[state]
798 if t is not None:
799 if t > 0:
800 # shift a symbol on the stack
801 statestack.append(t)
802 state = t
804 symstack.append(lookahead)
805 lookahead = None
807 # Decrease error count on successful shift
808 if errorcount:
809 errorcount -= 1
810 continue
812 if t < 0:
813 # reduce a symbol on the stack, emit a production
814 p = prod[-t]
815 pname = p.name
816 plen = p.len
818 # Get production function
819 sym = YaccSymbol()
820 sym.type = pname # Production name
821 sym.value = None
823 if plen:
824 targ = symstack[-plen - 1 :]
825 targ[0] = sym
827 # --! TRACKING
828 if tracking:
829 t1 = targ[1]
830 sym.lineno = t1.lineno
831 sym.lexpos = t1.lexpos
832 t1 = targ[-1]
833 sym.endlineno = getattr(t1, "endlineno", t1.lineno)
834 sym.endlexpos = getattr(t1, "endlexpos", t1.lexpos)
835 # --! TRACKING
837 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
838 # The code enclosed in this section is duplicated
839 # below as a performance optimization. Make sure
840 # changes get made in both locations.
842 pslice.slice = targ
844 try:
845 # Call the grammar rule with our special slice object
846 del symstack[-plen:]
847 self.state = state
848 p.callable(pslice)
849 del statestack[-plen:]
850 symstack.append(sym)
851 state = goto[statestack[-1]][pname]
852 statestack.append(state)
853 except SyntaxError:
854 # If an error was set. Enter error recovery state
855 lookaheadstack.append(lookahead) # Save the current lookahead token
856 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
857 statestack.pop() # Pop back one state (before the reduce)
858 state = statestack[-1]
859 sym.type = "error"
860 sym.value = "error"
861 lookahead = sym
862 errorcount = error_count
863 self.errorok = False
865 continue
866 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
868 else:
870 # --! TRACKING
871 if tracking:
872 sym.lineno = lexer.lineno
873 sym.lexpos = lexer.lexpos
874 # --! TRACKING
876 targ = [sym]
878 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
879 # The code enclosed in this section is duplicated
880 # above as a performance optimization. Make sure
881 # changes get made in both locations.
883 pslice.slice = targ
885 try:
886 # Call the grammar rule with our special slice object
887 self.state = state
888 p.callable(pslice)
889 symstack.append(sym)
890 state = goto[statestack[-1]][pname]
891 statestack.append(state)
892 except SyntaxError:
893 # If an error was set. Enter error recovery state
894 lookaheadstack.append(lookahead) # Save the current lookahead token
895 statestack.pop() # Pop back one state (before the reduce)
896 state = statestack[-1]
897 sym.type = "error"
898 sym.value = "error"
899 lookahead = sym
900 errorcount = error_count
901 self.errorok = False
903 continue
904 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
906 if t == 0:
907 n = symstack[-1]
908 result = getattr(n, "value", None)
909 return result
911 if t is None:
913 # We have some kind of parsing error here. To handle
914 # this, we are going to push the current token onto
915 # the tokenstack and replace it with an 'error' token.
916 # If there are any synchronization rules, they may
917 # catch it.
918 #
919 # In addition to pushing the error token, we call call
920 # the user defined p_error() function if this is the
921 # first syntax error. This function is only called if
922 # errorcount == 0.
923 if errorcount == 0 or self.errorok:
924 errorcount = error_count
925 self.errorok = False
926 errtoken = lookahead
927 if errtoken.type == "$end":
928 errtoken = None # End of file!
929 if self.errorfunc:
930 if errtoken and not hasattr(errtoken, "lexer"):
931 errtoken.lexer = lexer
932 self.state = state
933 tok = call_errorfunc(self.errorfunc, errtoken, self)
934 if self.errorok:
935 # User must have done some kind of panic
936 # mode recovery on their own. The
937 # returned token is the next lookahead
938 lookahead = tok
939 errtoken = None
940 continue
941 else:
942 if errtoken:
943 if hasattr(errtoken, "lineno"):
944 lineno = lookahead.lineno
945 else:
946 lineno = 0
947 if lineno:
948 sys.stderr.write(
949 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)
950 )
951 else:
952 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
953 else:
954 sys.stderr.write("yacc: Parse error in input. EOF\n")
955 return
957 else:
958 errorcount = error_count
960 # case 1: the statestack only has 1 entry on it. If we're in this state, the
961 # entire parse has been rolled back and we're completely hosed. The token is
962 # discarded and we just keep going.
964 if len(statestack) <= 1 and lookahead.type != "$end":
965 lookahead = None
966 errtoken = None
967 state = 0
968 # Nuke the pushback stack
969 del lookaheadstack[:]
970 continue
972 # case 2: the statestack has a couple of entries on it, but we're
973 # at the end of the file. nuke the top entry and generate an error token
975 # Start nuking entries on the stack
976 if lookahead.type == "$end":
977 # Whoa. We're really hosed here. Bail out
978 return
980 if lookahead.type != "error":
981 sym = symstack[-1]
982 if sym.type == "error":
983 # Hmmm. Error is on top of stack, we'll just nuke input
984 # symbol and continue
985 # --! TRACKING
986 if tracking:
987 sym.endlineno = getattr(lookahead, "lineno", sym.lineno)
988 sym.endlexpos = getattr(lookahead, "lexpos", sym.lexpos)
989 # --! TRACKING
990 lookahead = None
991 continue
993 # Create the error symbol for the first time and make it the new lookahead symbol
994 t = YaccSymbol()
995 t.type = "error"
997 if hasattr(lookahead, "lineno"):
998 t.lineno = t.endlineno = lookahead.lineno
999 if hasattr(lookahead, "lexpos"):
1000 t.lexpos = t.endlexpos = lookahead.lexpos
1001 t.value = lookahead
1002 lookaheadstack.append(lookahead)
1003 lookahead = t
1004 else:
1005 sym = symstack.pop()
1006 # --! TRACKING
1007 if tracking:
1008 lookahead.lineno = sym.lineno
1009 lookahead.lexpos = sym.lexpos
1010 # --! TRACKING
1011 statestack.pop()
1012 state = statestack[-1]
1014 continue
1016 # Call an error function here
1017 raise RuntimeError("yacc: internal parser error!!!\n")
1019 # --! parseopt-end
1021 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1022 # parseopt_notrack().
1023 #
1024 # Optimized version of parseopt() with line number tracking removed.
1025 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
1026 # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
1027 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1029 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
1030 # --! parseopt-notrack-start
1031 lookahead = None # Current lookahead symbol
1032 lookaheadstack = [] # Stack of lookahead symbols
1033 actions = self.action # Local reference to action table (to avoid lookup on self.)
1034 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
1035 prod = self.productions # Local reference to production list (to avoid lookup on self.)
1036 defaulted_states = self.defaulted_states # Local reference to defaulted states
1037 pslice = YaccProduction(None) # Production object passed to grammar rules
1038 errorcount = 0 # Used during error recovery
1040 # If no lexer was given, we will try to use the lex module
1041 if not lexer:
1042 from . import lex
1044 lexer = lex.lexer
1046 # Set up the lexer and parser objects on pslice
1047 pslice.lexer = lexer
1048 pslice.parser = self
1050 # If input was supplied, pass to lexer
1051 if input is not None:
1052 lexer.input(input)
1054 if tokenfunc is None:
1055 # Tokenize function
1056 get_token = lexer.token
1057 else:
1058 get_token = tokenfunc
1060 # Set the parser() token method (sometimes used in error recovery)
1061 self.token = get_token
1063 # Set up the state and symbol stacks
1065 statestack = [] # Stack of parsing states
1066 self.statestack = statestack
1067 symstack = [] # Stack of grammar symbols
1068 self.symstack = symstack
1070 pslice.stack = symstack # Put in the production
1071 errtoken = None # Err token
1073 # The start state is assumed to be (0,$end)
1075 statestack.append(0)
1076 sym = YaccSymbol()
1077 sym.type = "$end"
1078 symstack.append(sym)
1079 state = 0
1080 while True:
1081 # Get the next symbol on the input. If a lookahead symbol
1082 # is already set, we just use that. Otherwise, we'll pull
1083 # the next token off of the lookaheadstack or from the lexer
1085 if state not in defaulted_states:
1086 if not lookahead:
1087 if not lookaheadstack:
1088 lookahead = get_token() # Get the next token
1089 else:
1090 lookahead = lookaheadstack.pop()
1091 if not lookahead:
1092 lookahead = YaccSymbol()
1093 lookahead.type = "$end"
1095 # Check the action table
1096 ltype = lookahead.type
1097 t = actions[state].get(ltype)
1098 else:
1099 t = defaulted_states[state]
1101 if t is not None:
1102 if t > 0:
1103 # shift a symbol on the stack
1104 statestack.append(t)
1105 state = t
1107 symstack.append(lookahead)
1108 lookahead = None
1110 # Decrease error count on successful shift
1111 if errorcount:
1112 errorcount -= 1
1113 continue
1115 if t < 0:
1116 # reduce a symbol on the stack, emit a production
1117 p = prod[-t]
1118 pname = p.name
1119 plen = p.len
1121 # Get production function
1122 sym = YaccSymbol()
1123 sym.type = pname # Production name
1124 sym.value = None
1126 if plen:
1127 targ = symstack[-plen - 1 :]
1128 targ[0] = sym
1130 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1131 # The code enclosed in this section is duplicated
1132 # below as a performance optimization. Make sure
1133 # changes get made in both locations.
1135 pslice.slice = targ
1137 try:
1138 # Call the grammar rule with our special slice object
1139 del symstack[-plen:]
1140 self.state = state
1141 p.callable(pslice)
1142 del statestack[-plen:]
1143 symstack.append(sym)
1144 state = goto[statestack[-1]][pname]
1145 statestack.append(state)
1146 except SyntaxError:
1147 # If an error was set. Enter error recovery state
1148 lookaheadstack.append(lookahead) # Save the current lookahead token
1149 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
1150 statestack.pop() # Pop back one state (before the reduce)
1151 state = statestack[-1]
1152 sym.type = "error"
1153 sym.value = "error"
1154 lookahead = sym
1155 errorcount = error_count
1156 self.errorok = False
1158 continue
1159 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1161 else:
1163 targ = [sym]
1165 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1166 # The code enclosed in this section is duplicated
1167 # above as a performance optimization. Make sure
1168 # changes get made in both locations.
1170 pslice.slice = targ
1172 try:
1173 # Call the grammar rule with our special slice object
1174 self.state = state
1175 p.callable(pslice)
1176 symstack.append(sym)
1177 state = goto[statestack[-1]][pname]
1178 statestack.append(state)
1179 except SyntaxError:
1180 # If an error was set. Enter error recovery state
1181 lookaheadstack.append(lookahead) # Save the current lookahead token
1182 statestack.pop() # Pop back one state (before the reduce)
1183 state = statestack[-1]
1184 sym.type = "error"
1185 sym.value = "error"
1186 lookahead = sym
1187 errorcount = error_count
1188 self.errorok = False
1190 continue
1191 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1193 if t == 0:
1194 n = symstack[-1]
1195 result = getattr(n, "value", None)
1196 return result
1198 if t is None:
1200 # We have some kind of parsing error here. To handle
1201 # this, we are going to push the current token onto
1202 # the tokenstack and replace it with an 'error' token.
1203 # If there are any synchronization rules, they may
1204 # catch it.
1205 #
1206 # In addition to pushing the error token, we call call
1207 # the user defined p_error() function if this is the
1208 # first syntax error. This function is only called if
1209 # errorcount == 0.
1210 if errorcount == 0 or self.errorok:
1211 errorcount = error_count
1212 self.errorok = False
1213 errtoken = lookahead
1214 if errtoken.type == "$end":
1215 errtoken = None # End of file!
1216 if self.errorfunc:
1217 if errtoken and not hasattr(errtoken, "lexer"):
1218 errtoken.lexer = lexer
1219 self.state = state
1220 tok = call_errorfunc(self.errorfunc, errtoken, self)
1221 if self.errorok:
1222 # User must have done some kind of panic
1223 # mode recovery on their own. The
1224 # returned token is the next lookahead
1225 lookahead = tok
1226 errtoken = None
1227 continue
1228 else:
1229 if errtoken:
1230 if hasattr(errtoken, "lineno"):
1231 lineno = lookahead.lineno
1232 else:
1233 lineno = 0
1234 if lineno:
1235 sys.stderr.write(
1236 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)
1237 )
1238 else:
1239 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1240 else:
1241 sys.stderr.write("yacc: Parse error in input. EOF\n")
1242 return
1244 else:
1245 errorcount = error_count
1247 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1248 # entire parse has been rolled back and we're completely hosed. The token is
1249 # discarded and we just keep going.
1251 if len(statestack) <= 1 and lookahead.type != "$end":
1252 lookahead = None
1253 errtoken = None
1254 state = 0
1255 # Nuke the pushback stack
1256 del lookaheadstack[:]
1257 continue
1259 # case 2: the statestack has a couple of entries on it, but we're
1260 # at the end of the file. nuke the top entry and generate an error token
1262 # Start nuking entries on the stack
1263 if lookahead.type == "$end":
1264 # Whoa. We're really hosed here. Bail out
1265 return
1267 if lookahead.type != "error":
1268 sym = symstack[-1]
1269 if sym.type == "error":
1270 # Hmmm. Error is on top of stack, we'll just nuke input
1271 # symbol and continue
1272 lookahead = None
1273 continue
1275 # Create the error symbol for the first time and make it the new lookahead symbol
1276 t = YaccSymbol()
1277 t.type = "error"
1279 if hasattr(lookahead, "lineno"):
1280 t.lineno = t.endlineno = lookahead.lineno
1281 if hasattr(lookahead, "lexpos"):
1282 t.lexpos = t.endlexpos = lookahead.lexpos
1283 t.value = lookahead
1284 lookaheadstack.append(lookahead)
1285 lookahead = t
1286 else:
1287 sym = symstack.pop()
1288 statestack.pop()
1289 state = statestack[-1]
1291 continue
1293 # Call an error function here
1294 raise RuntimeError("yacc: internal parser error!!!\n")
1296 # --! parseopt-notrack-end
1299# -----------------------------------------------------------------------------
1300# === Grammar Representation ===
1301#
1302# The following functions, classes, and variables are used to represent and
1303# manipulate the rules that make up a grammar.
1304# -----------------------------------------------------------------------------
1306# regex matching identifiers
1307_is_identifier = re.compile(r"^[a-zA-Z0-9_-]+$")
1309# -----------------------------------------------------------------------------
1310# class Production:
1311#
1312# This class stores the raw information about a single production or grammar rule.
1313# A grammar rule refers to a specification such as this:
1314#
1315# expr : expr PLUS term
1316#
1317# Here are the basic attributes defined on all productions
1318#
1319# name - Name of the production. For example 'expr'
1320# prod - A list of symbols on the right side ['expr','PLUS','term']
1321# prec - Production precedence level
1322# number - Production number.
1323# func - Function that executes on reduce
1324# file - File where production function is defined
1325# lineno - Line number where production function is defined
1326#
1327# The following attributes are defined or optional.
1328#
1329# len - Length of the production (number of symbols on right hand side)
1330# usyms - Set of unique symbols found in the production
1331# -----------------------------------------------------------------------------
1334class Production(object):
1335 reduced = 0
1337 def __init__(self, number, name, prod, precedence=("right", 0), func=None, file="", line=0):
1338 self.name = name
1339 self.prod = tuple(prod)
1340 self.number = number
1341 self.func = func
1342 self.callable = None
1343 self.file = file
1344 self.line = line
1345 self.prec = precedence
1347 # Internal settings used during table construction
1349 self.len = len(self.prod) # Length of the production
1351 # Create a list of unique production symbols used in the production
1352 self.usyms = []
1353 for s in self.prod:
1354 if s not in self.usyms:
1355 self.usyms.append(s)
1357 # List of all LR items for the production
1358 self.lr_items = []
1359 self.lr_next = None
1361 # Create a string representation
1362 if self.prod:
1363 self.str = "%s -> %s" % (self.name, " ".join(self.prod))
1364 else:
1365 self.str = "%s -> <empty>" % self.name
1367 def __str__(self):
1368 return self.str
1370 def __repr__(self):
1371 return "Production(" + str(self) + ")"
1373 def __len__(self):
1374 return len(self.prod)
1376 def __nonzero__(self):
1377 return 1
1379 def __getitem__(self, index):
1380 return self.prod[index]
1382 # Return the nth lr_item from the production (or None if at the end)
1383 def lr_item(self, n):
1384 if n > len(self.prod):
1385 return None
1386 p = LRItem(self, n)
1387 # Precompute the list of productions immediately following.
1388 try:
1389 p.lr_after = self.Prodnames[p.prod[n + 1]]
1390 except (IndexError, KeyError):
1391 p.lr_after = []
1392 try:
1393 p.lr_before = p.prod[n - 1]
1394 except IndexError:
1395 p.lr_before = None
1396 return p
1398 # Bind the production function name to a callable
1399 def bind(self, pdict):
1400 if self.func:
1401 self.callable = pdict[self.func]
1404# This class serves as a minimal standin for Production objects when
1405# reading table data from files. It only contains information
1406# actually used by the LR parsing engine, plus some additional
1407# debugging information.
1408class MiniProduction(object):
1409 def __init__(self, str, name, len, func, file, line):
1410 self.name = name
1411 self.len = len
1412 self.func = func
1413 self.callable = None
1414 self.file = file
1415 self.line = line
1416 self.str = str
1418 def __str__(self):
1419 return self.str
1421 def __repr__(self):
1422 return "MiniProduction(%s)" % self.str
1424 # Bind the production function name to a callable
1425 def bind(self, pdict):
1426 if self.func:
1427 self.callable = pdict[self.func]
1430# -----------------------------------------------------------------------------
1431# class LRItem
1432#
1433# This class represents a specific stage of parsing a production rule. For
1434# example:
1435#
1436# expr : expr . PLUS term
1437#
1438# In the above, the "." represents the current location of the parse. Here
1439# basic attributes:
1440#
1441# name - Name of the production. For example 'expr'
1442# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1443# number - Production number.
1444#
1445# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1446# then lr_next refers to 'expr -> expr PLUS . term'
1447# lr_index - LR item index (location of the ".") in the prod list.
1448# lookaheads - LALR lookahead symbols for this item
1449# len - Length of the production (number of symbols on right hand side)
1450# lr_after - List of all productions that immediately follow
1451# lr_before - Grammar symbol immediately before
1452# -----------------------------------------------------------------------------
1455class LRItem(object):
1456 def __init__(self, p, n):
1457 self.name = p.name
1458 self.prod = list(p.prod)
1459 self.number = p.number
1460 self.lr_index = n
1461 self.lookaheads = {}
1462 self.prod.insert(n, ".")
1463 self.prod = tuple(self.prod)
1464 self.len = len(self.prod)
1465 self.usyms = p.usyms
1467 def __str__(self):
1468 if self.prod:
1469 s = "%s -> %s" % (self.name, " ".join(self.prod))
1470 else:
1471 s = "%s -> <empty>" % self.name
1472 return s
1474 def __repr__(self):
1475 return "LRItem(" + str(self) + ")"
1478# -----------------------------------------------------------------------------
1479# rightmost_terminal()
1480#
1481# Return the rightmost terminal from a list of symbols. Used in add_production()
1482# -----------------------------------------------------------------------------
1483def rightmost_terminal(symbols, terminals):
1484 i = len(symbols) - 1
1485 while i >= 0:
1486 if symbols[i] in terminals:
1487 return symbols[i]
1488 i -= 1
1489 return None
1492# -----------------------------------------------------------------------------
1493# === GRAMMAR CLASS ===
1494#
1495# The following class represents the contents of the specified grammar along
1496# with various computed properties such as first sets, follow sets, LR items, etc.
1497# This data is used for critical parts of the table generation process later.
1498# -----------------------------------------------------------------------------
1501class GrammarError(YaccError):
1502 pass
1505class Grammar(object):
1506 def __init__(self, terminals):
1507 self.Productions = [None] # A list of all of the productions. The first
1508 # entry is always reserved for the purpose of
1509 # building an augmented grammar
1511 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
1512 # productions of that nonterminal.
1514 self.Prodmap = {} # A dictionary that is only used to detect duplicate
1515 # productions.
1517 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
1518 # list of the rules where they are used.
1520 for term in terminals:
1521 self.Terminals[term] = []
1523 self.Terminals["error"] = []
1525 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
1526 # of rule numbers where they are used.
1528 self.First = {} # A dictionary of precomputed FIRST(x) symbols
1530 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
1532 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
1533 # form ('right',level) or ('nonassoc', level) or ('left',level)
1535 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
1536 # This is only used to provide error checking and to generate
1537 # a warning about unused precedence rules.
1539 self.Start = None # Starting symbol for the grammar
1541 def __len__(self):
1542 return len(self.Productions)
1544 def __getitem__(self, index):
1545 return self.Productions[index]
1547 # -----------------------------------------------------------------------------
1548 # set_precedence()
1549 #
1550 # Sets the precedence for a given terminal. assoc is the associativity such as
1551 # 'left','right', or 'nonassoc'. level is a numeric level.
1552 #
1553 # -----------------------------------------------------------------------------
1555 def set_precedence(self, term, assoc, level):
1556 assert self.Productions == [None], "Must call set_precedence() before add_production()"
1557 if term in self.Precedence:
1558 raise GrammarError("Precedence already specified for terminal %r" % term)
1559 if assoc not in ["left", "right", "nonassoc"]:
1560 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1561 self.Precedence[term] = (assoc, level)
1563 # -----------------------------------------------------------------------------
1564 # add_production()
1565 #
1566 # Given an action function, this function assembles a production rule and
1567 # computes its precedence level.
1568 #
1569 # The production rule is supplied as a list of symbols. For example,
1570 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1571 # symbols ['expr','PLUS','term'].
1572 #
1573 # Precedence is determined by the precedence of the right-most non-terminal
1574 # or the precedence of a terminal specified by %prec.
1575 #
1576 # A variety of error checks are performed to make sure production symbols
1577 # are valid and that %prec is used correctly.
1578 # -----------------------------------------------------------------------------
1580 def add_production(self, prodname, syms, func=None, file="", line=0):
1582 if prodname in self.Terminals:
1583 raise GrammarError(
1584 "%s:%d: Illegal rule name %r. Already defined as a token" % (file, line, prodname)
1585 )
1586 if prodname == "error":
1587 raise GrammarError(
1588 "%s:%d: Illegal rule name %r. error is a reserved word" % (file, line, prodname)
1589 )
1590 if not _is_identifier.match(prodname):
1591 raise GrammarError("%s:%d: Illegal rule name %r" % (file, line, prodname))
1593 # Look for literal tokens
1594 for n, s in enumerate(syms):
1595 if s[0] in "'\"":
1596 try:
1597 c = eval(s)
1598 if len(c) > 1:
1599 raise GrammarError(
1600 "%s:%d: Literal token %s in rule %r may only be a single character"
1601 % (file, line, s, prodname)
1602 )
1603 if c not in self.Terminals:
1604 self.Terminals[c] = []
1605 syms[n] = c
1606 continue
1607 except SyntaxError:
1608 pass
1609 if not _is_identifier.match(s) and s != "%prec":
1610 raise GrammarError("%s:%d: Illegal name %r in rule %r" % (file, line, s, prodname))
1612 # Determine the precedence level
1613 if "%prec" in syms:
1614 if syms[-1] == "%prec":
1615 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file, line))
1616 if syms[-2] != "%prec":
1617 raise GrammarError(
1618 "%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file, line)
1619 )
1620 precname = syms[-1]
1621 prodprec = self.Precedence.get(precname)
1622 if not prodprec:
1623 raise GrammarError("%s:%d: Nothing known about the precedence of %r" % (file, line, precname))
1624 else:
1625 self.UsedPrecedence.add(precname)
1626 del syms[-2:] # Drop %prec from the rule
1627 else:
1628 # If no %prec, precedence is determined by the rightmost terminal symbol
1629 precname = rightmost_terminal(syms, self.Terminals)
1630 prodprec = self.Precedence.get(precname, ("right", 0))
1632 # See if the rule is already in the rulemap
1633 map = "%s -> %s" % (prodname, syms)
1634 if map in self.Prodmap:
1635 m = self.Prodmap[map]
1636 raise GrammarError(
1637 "%s:%d: Duplicate rule %s. " % (file, line, m)
1638 + "Previous definition at %s:%d" % (m.file, m.line)
1639 )
1641 # From this point on, everything is valid. Create a new Production instance
1642 pnumber = len(self.Productions)
1643 if prodname not in self.Nonterminals:
1644 self.Nonterminals[prodname] = []
1646 # Add the production number to Terminals and Nonterminals
1647 for t in syms:
1648 if t in self.Terminals:
1649 self.Terminals[t].append(pnumber)
1650 else:
1651 if t not in self.Nonterminals:
1652 self.Nonterminals[t] = []
1653 self.Nonterminals[t].append(pnumber)
1655 # Create a production and add it to the list of productions
1656 p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1657 self.Productions.append(p)
1658 self.Prodmap[map] = p
1660 # Add to the global productions list
1661 try:
1662 self.Prodnames[prodname].append(p)
1663 except KeyError:
1664 self.Prodnames[prodname] = [p]
1666 # -----------------------------------------------------------------------------
1667 # set_start()
1668 #
1669 # Sets the starting symbol and creates the augmented grammar. Production
1670 # rule 0 is S' -> start where start is the start symbol.
1671 # -----------------------------------------------------------------------------
1673 def set_start(self, start=None):
1674 if not start:
1675 start = self.Productions[1].name
1676 if start not in self.Nonterminals:
1677 raise GrammarError("start symbol %s undefined" % start)
1678 self.Productions[0] = Production(0, "S'", [start])
1679 self.Nonterminals[start].append(0)
1680 self.Start = start
1682 # -----------------------------------------------------------------------------
1683 # find_unreachable()
1684 #
1685 # Find all of the nonterminal symbols that can't be reached from the starting
1686 # symbol. Returns a list of nonterminals that can't be reached.
1687 # -----------------------------------------------------------------------------
1689 def find_unreachable(self):
1691 # Mark all symbols that are reachable from a symbol s
1692 def mark_reachable_from(s):
1693 if s in reachable:
1694 return
1695 reachable.add(s)
1696 for p in self.Prodnames.get(s, []):
1697 for r in p.prod:
1698 mark_reachable_from(r)
1700 reachable = set()
1701 mark_reachable_from(self.Productions[0].prod[0])
1702 return [s for s in self.Nonterminals if s not in reachable]
1704 # -----------------------------------------------------------------------------
1705 # infinite_cycles()
1706 #
1707 # This function looks at the various parsing rules and tries to detect
1708 # infinite recursion cycles (grammar rules where there is no possible way
1709 # to derive a string of only terminals).
1710 # -----------------------------------------------------------------------------
1712 def infinite_cycles(self):
1713 terminates = {}
1715 # Terminals:
1716 for t in self.Terminals:
1717 terminates[t] = True
1719 terminates["$end"] = True
1721 # Nonterminals:
1723 # Initialize to false:
1724 for n in self.Nonterminals:
1725 terminates[n] = False
1727 # Then propagate termination until no change:
1728 while True:
1729 some_change = False
1730 for (n, pl) in self.Prodnames.items():
1731 # Nonterminal n terminates iff any of its productions terminates.
1732 for p in pl:
1733 # Production p terminates iff all of its rhs symbols terminate.
1734 for s in p.prod:
1735 if not terminates[s]:
1736 # The symbol s does not terminate,
1737 # so production p does not terminate.
1738 p_terminates = False
1739 break
1740 else:
1741 # didn't break from the loop,
1742 # so every symbol s terminates
1743 # so production p terminates.
1744 p_terminates = True
1746 if p_terminates:
1747 # symbol n terminates!
1748 if not terminates[n]:
1749 terminates[n] = True
1750 some_change = True
1751 # Don't need to consider any more productions for this n.
1752 break
1754 if not some_change:
1755 break
1757 infinite = []
1758 for (s, term) in terminates.items():
1759 if not term:
1760 if s not in self.Prodnames and s not in self.Terminals and s != "error":
1761 # s is used-but-not-defined, and we've already warned of that,
1762 # so it would be overkill to say that it's also non-terminating.
1763 pass
1764 else:
1765 infinite.append(s)
1767 return infinite
1769 # -----------------------------------------------------------------------------
1770 # undefined_symbols()
1771 #
1772 # Find all symbols that were used the grammar, but not defined as tokens or
1773 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1774 # and prod is the production where the symbol was used.
1775 # -----------------------------------------------------------------------------
1776 def undefined_symbols(self):
1777 result = []
1778 for p in self.Productions:
1779 if not p:
1780 continue
1782 for s in p.prod:
1783 if s not in self.Prodnames and s not in self.Terminals and s != "error":
1784 result.append((s, p))
1785 return result
1787 # -----------------------------------------------------------------------------
1788 # unused_terminals()
1789 #
1790 # Find all terminals that were defined, but not used by the grammar. Returns
1791 # a list of all symbols.
1792 # -----------------------------------------------------------------------------
1793 def unused_terminals(self):
1794 unused_tok = []
1795 for s, v in self.Terminals.items():
1796 if s != "error" and not v:
1797 unused_tok.append(s)
1799 return unused_tok
1801 # ------------------------------------------------------------------------------
1802 # unused_rules()
1803 #
1804 # Find all grammar rules that were defined, but not used (maybe not reachable)
1805 # Returns a list of productions.
1806 # ------------------------------------------------------------------------------
1808 def unused_rules(self):
1809 unused_prod = []
1810 for s, v in self.Nonterminals.items():
1811 if not v:
1812 p = self.Prodnames[s][0]
1813 unused_prod.append(p)
1814 return unused_prod
1816 # -----------------------------------------------------------------------------
1817 # unused_precedence()
1818 #
1819 # Returns a list of tuples (term,precedence) corresponding to precedence
1820 # rules that were never used by the grammar. term is the name of the terminal
1821 # on which precedence was applied and precedence is a string such as 'left' or
1822 # 'right' corresponding to the type of precedence.
1823 # -----------------------------------------------------------------------------
1825 def unused_precedence(self):
1826 unused = []
1827 for termname in self.Precedence:
1828 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1829 unused.append((termname, self.Precedence[termname][0]))
1831 return unused
1833 # -------------------------------------------------------------------------
1834 # _first()
1835 #
1836 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1837 #
1838 # During execution of compute_first1, the result may be incomplete.
1839 # Afterward (e.g., when called from compute_follow()), it will be complete.
1840 # -------------------------------------------------------------------------
1841 def _first(self, beta):
1843 # We are computing First(x1,x2,x3,...,xn)
1844 result = []
1845 for x in beta:
1846 x_produces_empty = False
1848 # Add all the non-<empty> symbols of First[x] to the result.
1849 for f in self.First[x]:
1850 if f == "<empty>":
1851 x_produces_empty = True
1852 else:
1853 if f not in result:
1854 result.append(f)
1856 if x_produces_empty:
1857 # We have to consider the next x in beta,
1858 # i.e. stay in the loop.
1859 pass
1860 else:
1861 # We don't have to consider any further symbols in beta.
1862 break
1863 else:
1864 # There was no 'break' from the loop,
1865 # so x_produces_empty was true for all x in beta,
1866 # so beta produces empty as well.
1867 result.append("<empty>")
1869 return result
1871 # -------------------------------------------------------------------------
1872 # compute_first()
1873 #
1874 # Compute the value of FIRST1(X) for all symbols
1875 # -------------------------------------------------------------------------
1876 def compute_first(self):
1877 if self.First:
1878 return self.First
1880 # Terminals:
1881 for t in self.Terminals:
1882 self.First[t] = [t]
1884 self.First["$end"] = ["$end"]
1886 # Nonterminals:
1888 # Initialize to the empty set:
1889 for n in self.Nonterminals:
1890 self.First[n] = []
1892 # Then propagate symbols until no change:
1893 while True:
1894 some_change = False
1895 for n in self.Nonterminals:
1896 for p in self.Prodnames[n]:
1897 for f in self._first(p.prod):
1898 if f not in self.First[n]:
1899 self.First[n].append(f)
1900 some_change = True
1901 if not some_change:
1902 break
1904 return self.First
1906 # ---------------------------------------------------------------------
1907 # compute_follow()
1908 #
1909 # Computes all of the follow sets for every non-terminal symbol. The
1910 # follow set is the set of all symbols that might follow a given
1911 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1912 # ---------------------------------------------------------------------
1913 def compute_follow(self, start=None):
1914 # If already computed, return the result
1915 if self.Follow:
1916 return self.Follow
1918 # If first sets not computed yet, do that first.
1919 if not self.First:
1920 self.compute_first()
1922 # Add '$end' to the follow list of the start symbol
1923 for k in self.Nonterminals:
1924 self.Follow[k] = []
1926 if not start:
1927 start = self.Productions[1].name
1929 self.Follow[start] = ["$end"]
1931 while True:
1932 didadd = False
1933 for p in self.Productions[1:]:
1934 # Here is the production set
1935 for i, B in enumerate(p.prod):
1936 if B in self.Nonterminals:
1937 # Okay. We got a non-terminal in a production
1938 fst = self._first(p.prod[i + 1 :])
1939 hasempty = False
1940 for f in fst:
1941 if f != "<empty>" and f not in self.Follow[B]:
1942 self.Follow[B].append(f)
1943 didadd = True
1944 if f == "<empty>":
1945 hasempty = True
1946 if hasempty or i == (len(p.prod) - 1):
1947 # Add elements of follow(a) to follow(b)
1948 for f in self.Follow[p.name]:
1949 if f not in self.Follow[B]:
1950 self.Follow[B].append(f)
1951 didadd = True
1952 if not didadd:
1953 break
1954 return self.Follow
1956 # -----------------------------------------------------------------------------
1957 # build_lritems()
1958 #
1959 # This function walks the list of productions and builds a complete set of the
1960 # LR items. The LR items are stored in two ways: First, they are uniquely
1961 # numbered and placed in the list _lritems. Second, a linked list of LR items
1962 # is built for each production. For example:
1963 #
1964 # E -> E PLUS E
1965 #
1966 # Creates the list
1967 #
1968 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1969 # -----------------------------------------------------------------------------
1971 def build_lritems(self):
1972 for p in self.Productions:
1973 lastlri = p
1974 i = 0
1975 lr_items = []
1976 while True:
1977 if i > len(p):
1978 lri = None
1979 else:
1980 lri = LRItem(p, i)
1981 # Precompute the list of productions immediately following
1982 try:
1983 lri.lr_after = self.Prodnames[lri.prod[i + 1]]
1984 except (IndexError, KeyError):
1985 lri.lr_after = []
1986 try:
1987 lri.lr_before = lri.prod[i - 1]
1988 except IndexError:
1989 lri.lr_before = None
1991 lastlri.lr_next = lri
1992 if not lri:
1993 break
1994 lr_items.append(lri)
1995 lastlri = lri
1996 i += 1
1997 p.lr_items = lr_items
2000# -----------------------------------------------------------------------------
2001# == Class LRTable ==
2002#
2003# This basic class represents a basic table of LR parsing information.
2004# Methods for generating the tables are not defined here. They are defined
2005# in the derived class LRGeneratedTable.
2006# -----------------------------------------------------------------------------
2009class VersionError(YaccError):
2010 pass
2013class LRTable(object):
2014 def __init__(self):
2015 self.lr_action = None
2016 self.lr_goto = None
2017 self.lr_productions = None
2018 self.lr_method = None
2020 def read_table(self, module):
2021 if isinstance(module, types.ModuleType):
2022 parsetab = module
2023 else:
2024 exec("import %s" % module)
2025 parsetab = sys.modules[module]
2027 if parsetab._tabversion != __tabversion__:
2028 raise VersionError("yacc table file version is out of date")
2030 self.lr_action = parsetab._lr_action
2031 self.lr_goto = parsetab._lr_goto
2033 self.lr_productions = []
2034 for p in parsetab._lr_productions:
2035 self.lr_productions.append(MiniProduction(*p))
2037 self.lr_method = parsetab._lr_method
2038 return parsetab._lr_signature
2040 def read_pickle(self, filename):
2041 try:
2042 import cPickle as pickle
2043 except ImportError:
2044 import pickle
2046 if not os.path.exists(filename):
2047 raise ImportError
2049 in_f = open(filename, "rb")
2051 tabversion = pickle.load(in_f)
2052 if tabversion != __tabversion__:
2053 raise VersionError("yacc table file version is out of date")
2054 self.lr_method = pickle.load(in_f)
2055 signature = pickle.load(in_f)
2056 self.lr_action = pickle.load(in_f)
2057 self.lr_goto = pickle.load(in_f)
2058 productions = pickle.load(in_f)
2060 self.lr_productions = []
2061 for p in productions:
2062 self.lr_productions.append(MiniProduction(*p))
2064 in_f.close()
2065 return signature
2067 # Bind all production function names to callable objects in pdict
2068 def bind_callables(self, pdict):
2069 for p in self.lr_productions:
2070 p.bind(pdict)
2073# -----------------------------------------------------------------------------
2074# === LR Generator ===
2075#
2076# The following classes and functions are used to generate LR parsing tables on
2077# a grammar.
2078# -----------------------------------------------------------------------------
2080# -----------------------------------------------------------------------------
2081# digraph()
2082# traverse()
2083#
2084# The following two functions are used to compute set valued functions
2085# of the form:
2086#
2087# F(x) = F'(x) U U{F(y) | x R y}
2088#
2089# This is used to compute the values of Read() sets as well as FOLLOW sets
2090# in LALR(1) generation.
2091#
2092# Inputs: X - An input set
2093# R - A relation
2094# FP - Set-valued function
2095# ------------------------------------------------------------------------------
2098def digraph(X, R, FP):
2099 N = {}
2100 for x in X:
2101 N[x] = 0
2102 stack = []
2103 F = {}
2104 for x in X:
2105 if N[x] == 0:
2106 traverse(x, N, stack, F, X, R, FP)
2107 return F
2110def traverse(x, N, stack, F, X, R, FP):
2111 stack.append(x)
2112 d = len(stack)
2113 N[x] = d
2114 F[x] = FP(x) # F(X) <- F'(x)
2116 rel = R(x) # Get y's related to x
2117 for y in rel:
2118 if N[y] == 0:
2119 traverse(y, N, stack, F, X, R, FP)
2120 N[x] = min(N[x], N[y])
2121 for a in F.get(y, []):
2122 if a not in F[x]:
2123 F[x].append(a)
2124 if N[x] == d:
2125 N[stack[-1]] = MAXINT
2126 F[stack[-1]] = F[x]
2127 element = stack.pop()
2128 while element != x:
2129 N[stack[-1]] = MAXINT
2130 F[stack[-1]] = F[x]
2131 element = stack.pop()
2134class LALRError(YaccError):
2135 pass
2138# -----------------------------------------------------------------------------
2139# == LRGeneratedTable ==
2140#
2141# This class implements the LR table generation algorithm. There are no
2142# public methods except for write()
2143# -----------------------------------------------------------------------------
2146class LRGeneratedTable(LRTable):
2147 def __init__(self, grammar, method="LALR", log=None):
2148 if method not in ["SLR", "LALR"]:
2149 raise LALRError("Unsupported method %s" % method)
2151 self.grammar = grammar
2152 self.lr_method = method
2154 # Set up the logger
2155 if not log:
2156 log = NullLogger()
2157 self.log = log
2159 # Internal attributes
2160 self.lr_action = {} # Action table
2161 self.lr_goto = {} # Goto table
2162 self.lr_productions = grammar.Productions # Copy of grammar Production array
2163 self.lr_goto_cache = {} # Cache of computed gotos
2164 self.lr0_cidhash = {} # Cache of closures
2166 self._add_count = 0 # Internal counter used to detect cycles
2168 # Diagonistic information filled in by the table generator
2169 self.sr_conflict = 0
2170 self.rr_conflict = 0
2171 self.conflicts = [] # List of conflicts
2173 self.sr_conflicts = []
2174 self.rr_conflicts = []
2176 # Build the tables
2177 self.grammar.build_lritems()
2178 self.grammar.compute_first()
2179 self.grammar.compute_follow()
2180 self.lr_parse_table()
2182 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2184 def lr0_closure(self, I):
2185 self._add_count += 1
2187 # Add everything in I to J
2188 J = I[:]
2189 didadd = True
2190 while didadd:
2191 didadd = False
2192 for j in J:
2193 for x in j.lr_after:
2194 if getattr(x, "lr0_added", 0) == self._add_count:
2195 continue
2196 # Add B --> .G to J
2197 J.append(x.lr_next)
2198 x.lr0_added = self._add_count
2199 didadd = True
2201 return J
2203 # Compute the LR(0) goto function goto(I,X) where I is a set
2204 # of LR(0) items and X is a grammar symbol. This function is written
2205 # in a way that guarantees uniqueness of the generated goto sets
2206 # (i.e. the same goto set will never be returned as two different Python
2207 # objects). With uniqueness, we can later do fast set comparisons using
2208 # id(obj) instead of element-wise comparison.
2210 def lr0_goto(self, I, x):
2211 # First we look for a previously cached entry
2212 g = self.lr_goto_cache.get((id(I), x))
2213 if g:
2214 return g
2216 # Now we generate the goto set in a way that guarantees uniqueness
2217 # of the result
2219 s = self.lr_goto_cache.get(x)
2220 if not s:
2221 s = {}
2222 self.lr_goto_cache[x] = s
2224 gs = []
2225 for p in I:
2226 n = p.lr_next
2227 if n and n.lr_before == x:
2228 s1 = s.get(id(n))
2229 if not s1:
2230 s1 = {}
2231 s[id(n)] = s1
2232 gs.append(n)
2233 s = s1
2234 g = s.get("$end")
2235 if not g:
2236 if gs:
2237 g = self.lr0_closure(gs)
2238 s["$end"] = g
2239 else:
2240 s["$end"] = gs
2241 self.lr_goto_cache[(id(I), x)] = g
2242 return g
2244 # Compute the LR(0) sets of item function
2245 def lr0_items(self):
2246 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2247 i = 0
2248 for I in C:
2249 self.lr0_cidhash[id(I)] = i
2250 i += 1
2252 # Loop over the items in C and each grammar symbols
2253 i = 0
2254 while i < len(C):
2255 I = C[i]
2256 i += 1
2258 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2259 asyms = {}
2260 for ii in I:
2261 for s in ii.usyms:
2262 asyms[s] = None
2264 for x in asyms:
2265 g = self.lr0_goto(I, x)
2266 if not g or id(g) in self.lr0_cidhash:
2267 continue
2268 self.lr0_cidhash[id(g)] = len(C)
2269 C.append(g)
2271 return C
2273 # -----------------------------------------------------------------------------
2274 # ==== LALR(1) Parsing ====
2275 #
2276 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2277 # relying upon Follow() sets when performing reductions, a more selective
2278 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2279 # Thus, we mainly just have to focus on calculating the lookahead sets.
2280 #
2281 # The method used here is due to DeRemer and Pennelo (1982).
2282 #
2283 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2284 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2285 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2286 #
2287 # Further details can also be found in:
2288 #
2289 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2290 # McGraw-Hill Book Company, (1985).
2291 #
2292 # -----------------------------------------------------------------------------
2294 # -----------------------------------------------------------------------------
2295 # compute_nullable_nonterminals()
2296 #
2297 # Creates a dictionary containing all of the non-terminals that might produce
2298 # an empty production.
2299 # -----------------------------------------------------------------------------
2301 def compute_nullable_nonterminals(self):
2302 nullable = set()
2303 num_nullable = 0
2304 while True:
2305 for p in self.grammar.Productions[1:]:
2306 if p.len == 0:
2307 nullable.add(p.name)
2308 continue
2309 for t in p.prod:
2310 if t not in nullable:
2311 break
2312 else:
2313 nullable.add(p.name)
2314 if len(nullable) == num_nullable:
2315 break
2316 num_nullable = len(nullable)
2317 return nullable
2319 # -----------------------------------------------------------------------------
2320 # find_nonterminal_trans(C)
2321 #
2322 # Given a set of LR(0) items, this functions finds all of the non-terminal
2323 # transitions. These are transitions in which a dot appears immediately before
2324 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2325 # is the state number and N is the nonterminal symbol.
2326 #
2327 # The input C is the set of LR(0) items.
2328 # -----------------------------------------------------------------------------
2330 def find_nonterminal_transitions(self, C):
2331 trans = []
2332 for stateno, state in enumerate(C):
2333 for p in state:
2334 if p.lr_index < p.len - 1:
2335 t = (stateno, p.prod[p.lr_index + 1])
2336 if t[1] in self.grammar.Nonterminals:
2337 if t not in trans:
2338 trans.append(t)
2339 return trans
2341 # -----------------------------------------------------------------------------
2342 # dr_relation()
2343 #
2344 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2345 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2346 #
2347 # Returns a list of terminals.
2348 # -----------------------------------------------------------------------------
2350 def dr_relation(self, C, trans, nullable):
2351 state, N = trans
2352 terms = []
2354 g = self.lr0_goto(C[state], N)
2355 for p in g:
2356 if p.lr_index < p.len - 1:
2357 a = p.prod[p.lr_index + 1]
2358 if a in self.grammar.Terminals:
2359 if a not in terms:
2360 terms.append(a)
2362 # This extra bit is to handle the start state
2363 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2364 terms.append("$end")
2366 return terms
2368 # -----------------------------------------------------------------------------
2369 # reads_relation()
2370 #
2371 # Computes the READS() relation (p,A) READS (t,C).
2372 # -----------------------------------------------------------------------------
2374 def reads_relation(self, C, trans, empty):
2375 # Look for empty transitions
2376 rel = []
2377 state, N = trans
2379 g = self.lr0_goto(C[state], N)
2380 j = self.lr0_cidhash.get(id(g), -1)
2381 for p in g:
2382 if p.lr_index < p.len - 1:
2383 a = p.prod[p.lr_index + 1]
2384 if a in empty:
2385 rel.append((j, a))
2387 return rel
2389 # -----------------------------------------------------------------------------
2390 # compute_lookback_includes()
2391 #
2392 # Determines the lookback and includes relations
2393 #
2394 # LOOKBACK:
2395 #
2396 # This relation is determined by running the LR(0) state machine forward.
2397 # For example, starting with a production "N : . A B C", we run it forward
2398 # to obtain "N : A B C ." We then build a relationship between this final
2399 # state and the starting state. These relationships are stored in a dictionary
2400 # lookdict.
2401 #
2402 # INCLUDES:
2403 #
2404 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2405 #
2406 # This relation is used to determine non-terminal transitions that occur
2407 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2408 # if the following holds:
2409 #
2410 # B -> LAT, where T -> epsilon and p' -L-> p
2411 #
2412 # L is essentially a prefix (which may be empty), T is a suffix that must be
2413 # able to derive an empty string. State p' must lead to state p with the string L.
2414 #
2415 # -----------------------------------------------------------------------------
2417 def compute_lookback_includes(self, C, trans, nullable):
2418 lookdict = {} # Dictionary of lookback relations
2419 includedict = {} # Dictionary of include relations
2421 # Make a dictionary of non-terminal transitions
2422 dtrans = {}
2423 for t in trans:
2424 dtrans[t] = 1
2426 # Loop over all transitions and compute lookbacks and includes
2427 for state, N in trans:
2428 lookb = []
2429 includes = []
2430 for p in C[state]:
2431 if p.name != N:
2432 continue
2434 # Okay, we have a name match. We now follow the production all the way
2435 # through the state machine until we get the . on the right hand side
2437 lr_index = p.lr_index
2438 j = state
2439 while lr_index < p.len - 1:
2440 lr_index = lr_index + 1
2441 t = p.prod[lr_index]
2443 # Check to see if this symbol and state are a non-terminal transition
2444 if (j, t) in dtrans:
2445 # Yes. Okay, there is some chance that this is an includes relation
2446 # the only way to know for certain is whether the rest of the
2447 # production derives empty
2449 li = lr_index + 1
2450 while li < p.len:
2451 if p.prod[li] in self.grammar.Terminals:
2452 break # No forget it
2453 if p.prod[li] not in nullable:
2454 break
2455 li = li + 1
2456 else:
2457 # Appears to be a relation between (j,t) and (state,N)
2458 includes.append((j, t))
2460 g = self.lr0_goto(C[j], t) # Go to next set
2461 j = self.lr0_cidhash.get(id(g), -1) # Go to next state
2463 # When we get here, j is the final state, now we have to locate the production
2464 for r in C[j]:
2465 if r.name != p.name:
2466 continue
2467 if r.len != p.len:
2468 continue
2469 i = 0
2470 # This look is comparing a production ". A B C" with "A B C ."
2471 while i < r.lr_index:
2472 if r.prod[i] != p.prod[i + 1]:
2473 break
2474 i = i + 1
2475 else:
2476 lookb.append((j, r))
2477 for i in includes:
2478 if i not in includedict:
2479 includedict[i] = []
2480 includedict[i].append((state, N))
2481 lookdict[(state, N)] = lookb
2483 return lookdict, includedict
2485 # -----------------------------------------------------------------------------
2486 # compute_read_sets()
2487 #
2488 # Given a set of LR(0) items, this function computes the read sets.
2489 #
2490 # Inputs: C = Set of LR(0) items
2491 # ntrans = Set of nonterminal transitions
2492 # nullable = Set of empty transitions
2493 #
2494 # Returns a set containing the read sets
2495 # -----------------------------------------------------------------------------
2497 def compute_read_sets(self, C, ntrans, nullable):
2498 FP = lambda x: self.dr_relation(C, x, nullable)
2499 R = lambda x: self.reads_relation(C, x, nullable)
2500 F = digraph(ntrans, R, FP)
2501 return F
2503 # -----------------------------------------------------------------------------
2504 # compute_follow_sets()
2505 #
2506 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2507 # and an include set, this function computes the follow sets
2508 #
2509 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2510 #
2511 # Inputs:
2512 # ntrans = Set of nonterminal transitions
2513 # readsets = Readset (previously computed)
2514 # inclsets = Include sets (previously computed)
2515 #
2516 # Returns a set containing the follow sets
2517 # -----------------------------------------------------------------------------
2519 def compute_follow_sets(self, ntrans, readsets, inclsets):
2520 FP = lambda x: readsets[x]
2521 R = lambda x: inclsets.get(x, [])
2522 F = digraph(ntrans, R, FP)
2523 return F
2525 # -----------------------------------------------------------------------------
2526 # add_lookaheads()
2527 #
2528 # Attaches the lookahead symbols to grammar rules.
2529 #
2530 # Inputs: lookbacks - Set of lookback relations
2531 # followset - Computed follow set
2532 #
2533 # This function directly attaches the lookaheads to productions contained
2534 # in the lookbacks set
2535 # -----------------------------------------------------------------------------
2537 def add_lookaheads(self, lookbacks, followset):
2538 for trans, lb in lookbacks.items():
2539 # Loop over productions in lookback
2540 for state, p in lb:
2541 if state not in p.lookaheads:
2542 p.lookaheads[state] = []
2543 f = followset.get(trans, [])
2544 for a in f:
2545 if a not in p.lookaheads[state]:
2546 p.lookaheads[state].append(a)
2548 # -----------------------------------------------------------------------------
2549 # add_lalr_lookaheads()
2550 #
2551 # This function does all of the work of adding lookahead information for use
2552 # with LALR parsing
2553 # -----------------------------------------------------------------------------
2555 def add_lalr_lookaheads(self, C):
2556 # Determine all of the nullable nonterminals
2557 nullable = self.compute_nullable_nonterminals()
2559 # Find all non-terminal transitions
2560 trans = self.find_nonterminal_transitions(C)
2562 # Compute read sets
2563 readsets = self.compute_read_sets(C, trans, nullable)
2565 # Compute lookback/includes relations
2566 lookd, included = self.compute_lookback_includes(C, trans, nullable)
2568 # Compute LALR FOLLOW sets
2569 followsets = self.compute_follow_sets(trans, readsets, included)
2571 # Add all of the lookaheads
2572 self.add_lookaheads(lookd, followsets)
2574 # -----------------------------------------------------------------------------
2575 # lr_parse_table()
2576 #
2577 # This function constructs the parse tables for SLR or LALR
2578 # -----------------------------------------------------------------------------
2579 def lr_parse_table(self):
2580 Productions = self.grammar.Productions
2581 Precedence = self.grammar.Precedence
2582 goto = self.lr_goto # Goto array
2583 action = self.lr_action # Action array
2584 log = self.log # Logger for output
2586 actionp = {} # Action production array (temporary)
2588 log.info("Parsing method: %s", self.lr_method)
2590 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2591 # This determines the number of states
2593 C = self.lr0_items()
2595 if self.lr_method == "LALR":
2596 self.add_lalr_lookaheads(C)
2598 # Build the parser table, state by state
2599 st = 0
2600 for I in C:
2601 # Loop over each production in I
2602 actlist = [] # List of actions
2603 st_action = {}
2604 st_actionp = {}
2605 st_goto = {}
2606 log.info("")
2607 log.info("state %d", st)
2608 log.info("")
2609 for p in I:
2610 log.info(" (%d) %s", p.number, p)
2611 log.info("")
2613 for p in I:
2614 if p.len == p.lr_index + 1:
2615 if p.name == "S'":
2616 # Start symbol. Accept!
2617 st_action["$end"] = 0
2618 st_actionp["$end"] = p
2619 else:
2620 # We are at the end of a production. Reduce!
2621 if self.lr_method == "LALR":
2622 laheads = p.lookaheads[st]
2623 else:
2624 laheads = self.grammar.Follow[p.name]
2625 for a in laheads:
2626 actlist.append((a, p, "reduce using rule %d (%s)" % (p.number, p)))
2627 r = st_action.get(a)
2628 if r is not None:
2629 # Whoa. Have a shift/reduce or reduce/reduce conflict
2630 if r > 0:
2631 # Need to decide on shift or reduce here
2632 # By default we favor shifting. Need to add
2633 # some precedence rules here.
2635 # Shift precedence comes from the token
2636 sprec, slevel = Precedence.get(a, ("right", 0))
2638 # Reduce precedence comes from rule being reduced (p)
2639 rprec, rlevel = Productions[p.number].prec
2641 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == "left")):
2642 # We really need to reduce here.
2643 st_action[a] = -p.number
2644 st_actionp[a] = p
2645 if not slevel and not rlevel:
2646 log.info(" ! shift/reduce conflict for %s resolved as reduce", a)
2647 self.sr_conflicts.append((st, a, "reduce"))
2648 Productions[p.number].reduced += 1
2649 elif (slevel == rlevel) and (rprec == "nonassoc"):
2650 st_action[a] = None
2651 else:
2652 # Hmmm. Guess we'll keep the shift
2653 if not rlevel:
2654 log.info(" ! shift/reduce conflict for %s resolved as shift", a)
2655 self.sr_conflicts.append((st, a, "shift"))
2656 elif r < 0:
2657 # Reduce/reduce conflict. In this case, we favor the rule
2658 # that was defined first in the grammar file
2659 oldp = Productions[-r]
2660 pp = Productions[p.number]
2661 if oldp.line > pp.line:
2662 st_action[a] = -p.number
2663 st_actionp[a] = p
2664 chosenp, rejectp = pp, oldp
2665 Productions[p.number].reduced += 1
2666 Productions[oldp.number].reduced -= 1
2667 else:
2668 chosenp, rejectp = oldp, pp
2669 self.rr_conflicts.append((st, chosenp, rejectp))
2670 log.info(
2671 " ! reduce/reduce conflict for %s resolved using rule %d (%s)",
2672 a,
2673 st_actionp[a].number,
2674 st_actionp[a],
2675 )
2676 else:
2677 raise LALRError("Unknown conflict in state %d" % st)
2678 else:
2679 st_action[a] = -p.number
2680 st_actionp[a] = p
2681 Productions[p.number].reduced += 1
2682 else:
2683 i = p.lr_index
2684 a = p.prod[i + 1] # Get symbol right after the "."
2685 if a in self.grammar.Terminals:
2686 g = self.lr0_goto(I, a)
2687 j = self.lr0_cidhash.get(id(g), -1)
2688 if j >= 0:
2689 # We are in a shift state
2690 actlist.append((a, p, "shift and go to state %d" % j))
2691 r = st_action.get(a)
2692 if r is not None:
2693 # Whoa have a shift/reduce or shift/shift conflict
2694 if r > 0:
2695 if r != j:
2696 raise LALRError("Shift/shift conflict in state %d" % st)
2697 elif r < 0:
2698 # Do a precedence check.
2699 # - if precedence of reduce rule is higher, we reduce.
2700 # - if precedence of reduce is same and left assoc, we reduce.
2701 # - otherwise we shift
2703 # Shift precedence comes from the token
2704 sprec, slevel = Precedence.get(a, ("right", 0))
2706 # Reduce precedence comes from the rule that could have been reduced
2707 rprec, rlevel = Productions[st_actionp[a].number].prec
2709 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == "right")):
2710 # We decide to shift here... highest precedence to shift
2711 Productions[st_actionp[a].number].reduced -= 1
2712 st_action[a] = j
2713 st_actionp[a] = p
2714 if not rlevel:
2715 log.info(" ! shift/reduce conflict for %s resolved as shift", a)
2716 self.sr_conflicts.append((st, a, "shift"))
2717 elif (slevel == rlevel) and (rprec == "nonassoc"):
2718 st_action[a] = None
2719 else:
2720 # Hmmm. Guess we'll keep the reduce
2721 if not slevel and not rlevel:
2722 log.info(" ! shift/reduce conflict for %s resolved as reduce", a)
2723 self.sr_conflicts.append((st, a, "reduce"))
2725 else:
2726 raise LALRError("Unknown conflict in state %d" % st)
2727 else:
2728 st_action[a] = j
2729 st_actionp[a] = p
2731 # Print the actions associated with each terminal
2732 _actprint = {}
2733 for a, p, m in actlist:
2734 if a in st_action:
2735 if p is st_actionp[a]:
2736 log.info(" %-15s %s", a, m)
2737 _actprint[(a, m)] = 1
2738 log.info("")
2739 # Print the actions that were not used. (debugging)
2740 not_used = 0
2741 for a, p, m in actlist:
2742 if a in st_action:
2743 if p is not st_actionp[a]:
2744 if not (a, m) in _actprint:
2745 log.debug(" ! %-15s [ %s ]", a, m)
2746 not_used = 1
2747 _actprint[(a, m)] = 1
2748 if not_used:
2749 log.debug("")
2751 # Construct the goto table for this state
2753 nkeys = {}
2754 for ii in I:
2755 for s in ii.usyms:
2756 if s in self.grammar.Nonterminals:
2757 nkeys[s] = None
2758 for n in nkeys:
2759 g = self.lr0_goto(I, n)
2760 j = self.lr0_cidhash.get(id(g), -1)
2761 if j >= 0:
2762 st_goto[n] = j
2763 log.info(" %-30s shift and go to state %d", n, j)
2765 action[st] = st_action
2766 actionp[st] = st_actionp
2767 goto[st] = st_goto
2768 st += 1
2770 # -----------------------------------------------------------------------------
2771 # write()
2772 #
2773 # This function writes the LR parsing tables to a file
2774 # -----------------------------------------------------------------------------
2776 def write_table(self, tabmodule, outputdir="", signature=""):
2777 if isinstance(tabmodule, types.ModuleType):
2778 raise IOError("Won't overwrite existing tabmodule")
2780 basemodulename = tabmodule.split(".")[-1]
2781 filename = os.path.join(outputdir, basemodulename) + ".py"
2782 try:
2783 f = open(filename, "w")
2785 f.write(
2786 """
2787# %s
2788# This file is automatically generated. Do not edit.
2789# pylint: disable=W,C,R
2790_tabversion = %r
2792_lr_method = %r
2794_lr_signature = %r
2795 """
2796 % (os.path.basename(filename), __tabversion__, self.lr_method, signature)
2797 )
2799 # Change smaller to 0 to go back to original tables
2800 smaller = 1
2802 # Factor out names to try and make smaller
2803 if smaller:
2804 items = {}
2806 for s, nd in self.lr_action.items():
2807 for name, v in nd.items():
2808 i = items.get(name)
2809 if not i:
2810 i = ([], [])
2811 items[name] = i
2812 i[0].append(s)
2813 i[1].append(v)
2815 f.write("\n_lr_action_items = {")
2816 for k, v in items.items():
2817 f.write("%r:([" % k)
2818 for i in v[0]:
2819 f.write("%r," % i)
2820 f.write("],[")
2821 for i in v[1]:
2822 f.write("%r," % i)
2824 f.write("]),")
2825 f.write("}\n")
2827 f.write(
2828 """
2829_lr_action = {}
2830for _k, _v in _lr_action_items.items():
2831 for _x,_y in zip(_v[0],_v[1]):
2832 if not _x in _lr_action: _lr_action[_x] = {}
2833 _lr_action[_x][_k] = _y
2834del _lr_action_items
2835"""
2836 )
2838 else:
2839 f.write("\n_lr_action = { ")
2840 for k, v in self.lr_action.items():
2841 f.write("(%r,%r):%r," % (k[0], k[1], v))
2842 f.write("}\n")
2844 if smaller:
2845 # Factor out names to try and make smaller
2846 items = {}
2848 for s, nd in self.lr_goto.items():
2849 for name, v in nd.items():
2850 i = items.get(name)
2851 if not i:
2852 i = ([], [])
2853 items[name] = i
2854 i[0].append(s)
2855 i[1].append(v)
2857 f.write("\n_lr_goto_items = {")
2858 for k, v in items.items():
2859 f.write("%r:([" % k)
2860 for i in v[0]:
2861 f.write("%r," % i)
2862 f.write("],[")
2863 for i in v[1]:
2864 f.write("%r," % i)
2866 f.write("]),")
2867 f.write("}\n")
2869 f.write(
2870 """
2871_lr_goto = {}
2872for _k, _v in _lr_goto_items.items():
2873 for _x, _y in zip(_v[0], _v[1]):
2874 if not _x in _lr_goto: _lr_goto[_x] = {}
2875 _lr_goto[_x][_k] = _y
2876del _lr_goto_items
2877"""
2878 )
2879 else:
2880 f.write("\n_lr_goto = { ")
2881 for k, v in self.lr_goto.items():
2882 f.write("(%r,%r):%r," % (k[0], k[1], v))
2883 f.write("}\n")
2885 # Write production table
2886 f.write("_lr_productions = [\n")
2887 for p in self.lr_productions:
2888 if p.func:
2889 f.write(
2890 " (%r,%r,%d,%r,%r,%d),\n"
2891 % (p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)
2892 )
2893 else:
2894 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p), p.name, p.len))
2895 f.write("]\n")
2896 f.close()
2898 except IOError as e:
2899 raise
2901 # -----------------------------------------------------------------------------
2902 # pickle_table()
2903 #
2904 # This function pickles the LR parsing tables to a supplied file object
2905 # -----------------------------------------------------------------------------
2907 def pickle_table(self, filename, signature=""):
2908 try:
2909 import cPickle as pickle
2910 except ImportError:
2911 import pickle
2912 with open(filename, "wb") as outf:
2913 pickle.dump(__tabversion__, outf, pickle_protocol)
2914 pickle.dump(self.lr_method, outf, pickle_protocol)
2915 pickle.dump(signature, outf, pickle_protocol)
2916 pickle.dump(self.lr_action, outf, pickle_protocol)
2917 pickle.dump(self.lr_goto, outf, pickle_protocol)
2919 outp = []
2920 for p in self.lr_productions:
2921 if p.func:
2922 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2923 else:
2924 outp.append((str(p), p.name, p.len, None, None, None))
2925 pickle.dump(outp, outf, pickle_protocol)
2928# -----------------------------------------------------------------------------
2929# === INTROSPECTION ===
2930#
2931# The following functions and classes are used to implement the PLY
2932# introspection features followed by the yacc() function itself.
2933# -----------------------------------------------------------------------------
2935# -----------------------------------------------------------------------------
2936# get_caller_module_dict()
2937#
2938# This function returns a dictionary containing all of the symbols defined within
2939# a caller further down the call stack. This is used to get the environment
2940# associated with the yacc() call if none was provided.
2941# -----------------------------------------------------------------------------
2944def get_caller_module_dict(levels):
2945 f = sys._getframe(levels)
2946 ldict = f.f_globals.copy()
2947 if f.f_globals != f.f_locals:
2948 ldict.update(f.f_locals)
2949 return ldict
2952# -----------------------------------------------------------------------------
2953# parse_grammar()
2954#
2955# This takes a raw grammar rule string and parses it into production data
2956# -----------------------------------------------------------------------------
2957def parse_grammar(doc, file, line):
2958 grammar = []
2959 # Split the doc string into lines
2960 pstrings = doc.splitlines()
2961 lastp = None
2962 dline = line
2963 for ps in pstrings:
2964 dline += 1
2965 p = ps.split()
2966 if not p:
2967 continue
2968 try:
2969 if p[0] == "|":
2970 # This is a continuation of a previous rule
2971 if not lastp:
2972 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2973 prodname = lastp
2974 syms = p[1:]
2975 else:
2976 prodname = p[0]
2977 lastp = prodname
2978 syms = p[2:]
2979 assign = p[1]
2980 if assign != ":" and assign != "::=":
2981 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2983 grammar.append((file, dline, prodname, syms))
2984 except SyntaxError:
2985 raise
2986 except Exception:
2987 raise SyntaxError("%s:%d: Syntax error in rule %r" % (file, dline, ps.strip()))
2989 return grammar
2992# -----------------------------------------------------------------------------
2993# ParserReflect()
2994#
2995# This class represents information extracted for building a parser including
2996# start symbol, error function, tokens, precedence list, action functions,
2997# etc.
2998# -----------------------------------------------------------------------------
2999class ParserReflect(object):
3000 def __init__(self, pdict, log=None):
3001 self.pdict = pdict
3002 self.start = None
3003 self.error_func = None
3004 self.tokens = None
3005 self.modules = set()
3006 self.grammar = []
3007 self.error = False
3009 if log is None:
3010 self.log = PlyLogger(sys.stderr)
3011 else:
3012 self.log = log
3014 # Get all of the basic information
3015 def get_all(self):
3016 self.get_start()
3017 self.get_error_func()
3018 self.get_tokens()
3019 self.get_precedence()
3020 self.get_pfunctions()
3022 # Validate all of the information
3023 def validate_all(self):
3024 self.validate_start()
3025 self.validate_error_func()
3026 self.validate_tokens()
3027 self.validate_precedence()
3028 self.validate_pfunctions()
3029 self.validate_modules()
3030 return self.error
3032 # Compute a signature over the grammar
3033 def signature(self):
3034 parts = []
3035 try:
3036 if self.start:
3037 parts.append(self.start)
3038 if self.prec:
3039 parts.append("".join(["".join(p) for p in self.prec]))
3040 if self.tokens:
3041 parts.append(" ".join(self.tokens))
3042 for f in self.pfuncs:
3043 if f[3]:
3044 parts.append(f[3])
3045 except (TypeError, ValueError):
3046 pass
3047 return "".join(parts)
3049 # -----------------------------------------------------------------------------
3050 # validate_modules()
3051 #
3052 # This method checks to see if there are duplicated p_rulename() functions
3053 # in the parser module file. Without this function, it is really easy for
3054 # users to make mistakes by cutting and pasting code fragments (and it's a real
3055 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
3056 # we just do a little regular expression pattern matching of def statements
3057 # to try and detect duplicates.
3058 # -----------------------------------------------------------------------------
3060 def validate_modules(self):
3061 # Match def p_funcname(
3062 fre = re.compile(r"\s*def\s+(p_[a-zA-Z_0-9]*)\(")
3064 for module in self.modules:
3065 try:
3066 lines, linen = inspect.getsourcelines(module)
3067 except IOError:
3068 continue
3070 counthash = {}
3071 for linen, line in enumerate(lines):
3072 linen += 1
3073 m = fre.match(line)
3074 if m:
3075 name = m.group(1)
3076 prev = counthash.get(name)
3077 if not prev:
3078 counthash[name] = linen
3079 else:
3080 filename = inspect.getsourcefile(module)
3081 self.log.warning(
3082 "%s:%d: Function %s redefined. Previously defined on line %d",
3083 filename,
3084 linen,
3085 name,
3086 prev,
3087 )
3089 # Get the start symbol
3090 def get_start(self):
3091 self.start = self.pdict.get("start")
3093 # Validate the start symbol
3094 def validate_start(self):
3095 if self.start is not None:
3096 if not isinstance(self.start, string_types):
3097 self.log.error("'start' must be a string")
3099 # Look for error handler
3100 def get_error_func(self):
3101 self.error_func = self.pdict.get("p_error")
3103 # Validate the error function
3104 def validate_error_func(self):
3105 if self.error_func:
3106 if isinstance(self.error_func, types.FunctionType):
3107 ismethod = 0
3108 elif isinstance(self.error_func, types.MethodType):
3109 ismethod = 1
3110 else:
3111 self.log.error("'p_error' defined, but is not a function or method")
3112 self.error = True
3113 return
3115 eline = self.error_func.__code__.co_firstlineno
3116 efile = self.error_func.__code__.co_filename
3117 module = inspect.getmodule(self.error_func)
3118 self.modules.add(module)
3120 argcount = self.error_func.__code__.co_argcount - ismethod
3121 if argcount != 1:
3122 self.log.error("%s:%d: p_error() requires 1 argument", efile, eline)
3123 self.error = True
3125 # Get the tokens map
3126 def get_tokens(self):
3127 tokens = self.pdict.get("tokens")
3128 if not tokens:
3129 self.log.error("No token list is defined")
3130 self.error = True
3131 return
3133 if not isinstance(tokens, (list, tuple)):
3134 self.log.error("tokens must be a list or tuple")
3135 self.error = True
3136 return
3138 if not tokens:
3139 self.log.error("tokens is empty")
3140 self.error = True
3141 return
3143 self.tokens = sorted(tokens)
3145 # Validate the tokens
3146 def validate_tokens(self):
3147 # Validate the tokens.
3148 if "error" in self.tokens:
3149 self.log.error("Illegal token name 'error'. Is a reserved word")
3150 self.error = True
3151 return
3153 terminals = set()
3154 for n in self.tokens:
3155 if n in terminals:
3156 self.log.warning("Token %r multiply defined", n)
3157 terminals.add(n)
3159 # Get the precedence map (if any)
3160 def get_precedence(self):
3161 self.prec = self.pdict.get("precedence")
3163 # Validate and parse the precedence map
3164 def validate_precedence(self):
3165 preclist = []
3166 if self.prec:
3167 if not isinstance(self.prec, (list, tuple)):
3168 self.log.error("precedence must be a list or tuple")
3169 self.error = True
3170 return
3171 for level, p in enumerate(self.prec):
3172 if not isinstance(p, (list, tuple)):
3173 self.log.error("Bad precedence table")
3174 self.error = True
3175 return
3177 if len(p) < 2:
3178 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)", p)
3179 self.error = True
3180 return
3181 assoc = p[0]
3182 if not isinstance(assoc, string_types):
3183 self.log.error("precedence associativity must be a string")
3184 self.error = True
3185 return
3186 for term in p[1:]:
3187 if not isinstance(term, string_types):
3188 self.log.error("precedence items must be strings")
3189 self.error = True
3190 return
3191 preclist.append((term, assoc, level + 1))
3192 self.preclist = preclist
3194 # Get all p_functions from the grammar
3195 def get_pfunctions(self):
3196 p_functions = []
3197 for name, item in self.pdict.items():
3198 if not name.startswith("p_") or name == "p_error":
3199 continue
3200 if isinstance(item, (types.FunctionType, types.MethodType)):
3201 line = getattr(item, "co_firstlineno", item.__code__.co_firstlineno)
3202 module = inspect.getmodule(item)
3203 p_functions.append((line, module, name, item.__doc__))
3205 # Sort all of the actions by line number; make sure to stringify
3206 # modules to make them sortable, since `line` may not uniquely sort all
3207 # p functions
3208 p_functions.sort(
3209 key=lambda p_function: (p_function[0], str(p_function[1]), p_function[2], p_function[3])
3210 )
3211 self.pfuncs = p_functions
3213 # Validate all of the p_functions
3214 def validate_pfunctions(self):
3215 grammar = []
3216 # Check for non-empty symbols
3217 if len(self.pfuncs) == 0:
3218 self.log.error("no rules of the form p_rulename are defined")
3219 self.error = True
3220 return
3222 for line, module, name, doc in self.pfuncs:
3223 file = inspect.getsourcefile(module)
3224 func = self.pdict[name]
3225 if isinstance(func, types.MethodType):
3226 reqargs = 2
3227 else:
3228 reqargs = 1
3229 if func.__code__.co_argcount > reqargs:
3230 self.log.error("%s:%d: Rule %r has too many arguments", file, line, func.__name__)
3231 self.error = True
3232 elif func.__code__.co_argcount < reqargs:
3233 self.log.error("%s:%d: Rule %r requires an argument", file, line, func.__name__)
3234 self.error = True
3235 elif not func.__doc__:
3236 self.log.warning(
3237 "%s:%d: No documentation string specified in function %r (ignored)",
3238 file,
3239 line,
3240 func.__name__,
3241 )
3242 else:
3243 try:
3244 parsed_g = parse_grammar(doc, file, line)
3245 for g in parsed_g:
3246 grammar.append((name, g))
3247 except SyntaxError as e:
3248 self.log.error(str(e))
3249 self.error = True
3251 # Looks like a valid grammar rule
3252 # Mark the file in which defined.
3253 self.modules.add(module)
3255 # Secondary validation step that looks for p_ definitions that are not functions
3256 # or functions that look like they might be grammar rules.
3258 for n, v in self.pdict.items():
3259 if n.startswith("p_") and isinstance(v, (types.FunctionType, types.MethodType)):
3260 continue
3261 if n.startswith("t_"):
3262 continue
3263 if n.startswith("p_") and n != "p_error":
3264 self.log.warning("%r not defined as a function", n)
3265 if (isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or (
3266 isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2
3267 ):
3268 if v.__doc__:
3269 try:
3270 doc = v.__doc__.split(" ")
3271 if doc[1] == ":":
3272 self.log.warning(
3273 "%s:%d: Possible grammar rule %r defined without p_ prefix",
3274 v.__code__.co_filename,
3275 v.__code__.co_firstlineno,
3276 n,
3277 )
3278 except IndexError:
3279 pass
3281 self.grammar = grammar
3284# -----------------------------------------------------------------------------
3285# yacc(module)
3286#
3287# Build a parser
3288# -----------------------------------------------------------------------------
3291def yacc(
3292 method="LALR",
3293 debug=yaccdebug,
3294 module=None,
3295 tabmodule=tab_module,
3296 start=None,
3297 check_recursion=True,
3298 optimize=False,
3299 write_tables=True,
3300 debugfile=debug_file,
3301 outputdir=None,
3302 debuglog=None,
3303 errorlog=None,
3304 picklefile=None,
3305):
3307 if tabmodule is None:
3308 tabmodule = tab_module
3310 # Reference to the parsing method of the last built parser
3311 global parse
3313 # If pickling is enabled, table files are not created
3314 if picklefile:
3315 write_tables = 0
3317 if errorlog is None:
3318 errorlog = PlyLogger(sys.stderr)
3320 # Get the module dictionary used for the parser
3321 if module:
3322 _items = [(k, getattr(module, k)) for k in dir(module)]
3323 pdict = dict(_items)
3324 # If no __file__ or __package__ attributes are available, try to obtain them
3325 # from the __module__ instead
3326 if "__file__" not in pdict:
3327 pdict["__file__"] = sys.modules[pdict["__module__"]].__file__
3328 if "__package__" not in pdict and "__module__" in pdict:
3329 if hasattr(sys.modules[pdict["__module__"]], "__package__"):
3330 pdict["__package__"] = sys.modules[pdict["__module__"]].__package__
3331 else:
3332 pdict = get_caller_module_dict(2)
3334 if outputdir is None:
3335 # If no output directory is set, the location of the output files
3336 # is determined according to the following rules:
3337 # - If tabmodule specifies a package, files go into that package directory
3338 # - Otherwise, files go in the same directory as the specifying module
3339 if isinstance(tabmodule, types.ModuleType):
3340 srcfile = tabmodule.__file__
3341 else:
3342 if "." not in tabmodule:
3343 srcfile = pdict["__file__"]
3344 else:
3345 parts = tabmodule.split(".")
3346 pkgname = ".".join(parts[:-1])
3347 exec("import %s" % pkgname)
3348 srcfile = getattr(sys.modules[pkgname], "__file__", "")
3349 outputdir = os.path.dirname(srcfile)
3351 # Determine if the module is package of a package or not.
3352 # If so, fix the tabmodule setting so that tables load correctly
3353 pkg = pdict.get("__package__")
3354 if pkg and isinstance(tabmodule, str):
3355 if "." not in tabmodule:
3356 tabmodule = pkg + "." + tabmodule
3358 # Set start symbol if it's specified directly using an argument
3359 if start is not None:
3360 pdict["start"] = start
3362 # Collect parser information from the dictionary
3363 pinfo = ParserReflect(pdict, log=errorlog)
3364 pinfo.get_all()
3366 if pinfo.error:
3367 raise YaccError("Unable to build parser")
3369 # Check signature against table files (if any)
3370 signature = pinfo.signature()
3372 # Read the tables
3373 try:
3374 lr = LRTable()
3375 if picklefile:
3376 read_signature = lr.read_pickle(picklefile)
3377 else:
3378 read_signature = lr.read_table(tabmodule)
3379 if optimize or (read_signature == signature):
3380 try:
3381 lr.bind_callables(pinfo.pdict)
3382 parser = LRParser(lr, pinfo.error_func)
3383 parse = parser.parse
3384 return parser
3385 except Exception as e:
3386 errorlog.warning("There was a problem loading the table file: %r", e)
3387 except VersionError as e:
3388 errorlog.warning(str(e))
3389 except ImportError:
3390 pass
3392 if debuglog is None:
3393 if debug:
3394 try:
3395 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), "w"))
3396 except IOError as e:
3397 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3398 debuglog = NullLogger()
3399 else:
3400 debuglog = NullLogger()
3402 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
3404 errors = False
3406 # Validate the parser information
3407 if pinfo.validate_all():
3408 raise YaccError("Unable to build parser")
3410 if not pinfo.error_func:
3411 errorlog.warning("no p_error() function is defined")
3413 # Create a grammar object
3414 grammar = Grammar(pinfo.tokens)
3416 # Set precedence level for terminals
3417 for term, assoc, level in pinfo.preclist:
3418 try:
3419 grammar.set_precedence(term, assoc, level)
3420 except GrammarError as e:
3421 errorlog.warning("%s", e)
3423 # Add productions to the grammar
3424 for funcname, gram in pinfo.grammar:
3425 file, line, prodname, syms = gram
3426 try:
3427 grammar.add_production(prodname, syms, funcname, file, line)
3428 except GrammarError as e:
3429 errorlog.error("%s", e)
3430 errors = True
3432 # Set the grammar start symbols
3433 try:
3434 if start is None:
3435 grammar.set_start(pinfo.start)
3436 else:
3437 grammar.set_start(start)
3438 except GrammarError as e:
3439 errorlog.error(str(e))
3440 errors = True
3442 if errors:
3443 raise YaccError("Unable to build parser")
3445 # Verify the grammar structure
3446 undefined_symbols = grammar.undefined_symbols()
3447 for sym, prod in undefined_symbols:
3448 errorlog.error(
3449 "%s:%d: Symbol %r used, but not defined as a token or a rule", prod.file, prod.line, sym
3450 )
3451 errors = True
3453 unused_terminals = grammar.unused_terminals()
3454 if unused_terminals:
3455 debuglog.info("")
3456 debuglog.info("Unused terminals:")
3457 debuglog.info("")
3458 for term in unused_terminals:
3459 errorlog.warning("Token %r defined, but not used", term)
3460 debuglog.info(" %s", term)
3462 # Print out all productions to the debug log
3463 if debug:
3464 debuglog.info("")
3465 debuglog.info("Grammar")
3466 debuglog.info("")
3467 for n, p in enumerate(grammar.Productions):
3468 debuglog.info("Rule %-5d %s", n, p)
3470 # Find unused non-terminals
3471 unused_rules = grammar.unused_rules()
3472 for prod in unused_rules:
3473 errorlog.warning("%s:%d: Rule %r defined, but not used", prod.file, prod.line, prod.name)
3475 if len(unused_terminals) == 1:
3476 errorlog.warning("There is 1 unused token")
3477 if len(unused_terminals) > 1:
3478 errorlog.warning("There are %d unused tokens", len(unused_terminals))
3480 if len(unused_rules) == 1:
3481 errorlog.warning("There is 1 unused rule")
3482 if len(unused_rules) > 1:
3483 errorlog.warning("There are %d unused rules", len(unused_rules))
3485 if debug:
3486 debuglog.info("")
3487 debuglog.info("Terminals, with rules where they appear")
3488 debuglog.info("")
3489 terms = list(grammar.Terminals)
3490 terms.sort()
3491 for term in terms:
3492 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
3494 debuglog.info("")
3495 debuglog.info("Nonterminals, with rules where they appear")
3496 debuglog.info("")
3497 nonterms = list(grammar.Nonterminals)
3498 nonterms.sort()
3499 for nonterm in nonterms:
3500 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
3501 debuglog.info("")
3503 if check_recursion:
3504 unreachable = grammar.find_unreachable()
3505 for u in unreachable:
3506 errorlog.warning("Symbol %r is unreachable", u)
3508 infinite = grammar.infinite_cycles()
3509 for inf in infinite:
3510 errorlog.error("Infinite recursion detected for symbol %r", inf)
3511 errors = True
3513 unused_prec = grammar.unused_precedence()
3514 for term, assoc in unused_prec:
3515 errorlog.error("Precedence rule %r defined for unknown symbol %r", assoc, term)
3516 errors = True
3518 if errors:
3519 raise YaccError("Unable to build parser")
3521 # Run the LRGeneratedTable on the grammar
3522 if debug:
3523 errorlog.debug("Generating %s tables", method)
3525 lr = LRGeneratedTable(grammar, method, debuglog)
3527 if debug:
3528 num_sr = len(lr.sr_conflicts)
3530 # Report shift/reduce and reduce/reduce conflicts
3531 if num_sr == 1:
3532 errorlog.warning("1 shift/reduce conflict")
3533 elif num_sr > 1:
3534 errorlog.warning("%d shift/reduce conflicts", num_sr)
3536 num_rr = len(lr.rr_conflicts)
3537 if num_rr == 1:
3538 errorlog.warning("1 reduce/reduce conflict")
3539 elif num_rr > 1:
3540 errorlog.warning("%d reduce/reduce conflicts", num_rr)
3542 # Write out conflicts to the output file
3543 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3544 debuglog.warning("")
3545 debuglog.warning("Conflicts:")
3546 debuglog.warning("")
3548 for state, tok, resolution in lr.sr_conflicts:
3549 debuglog.warning(
3550 "shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution
3551 )
3553 already_reported = set()
3554 for state, rule, rejected in lr.rr_conflicts:
3555 if (state, id(rule), id(rejected)) in already_reported:
3556 continue
3557 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3558 debuglog.warning("rejected rule (%s) in state %d", rejected, state)
3559 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3560 errorlog.warning("rejected rule (%s) in state %d", rejected, state)
3561 already_reported.add((state, id(rule), id(rejected)))
3563 warned_never = []
3564 for state, rule, rejected in lr.rr_conflicts:
3565 if not rejected.reduced and (rejected not in warned_never):
3566 debuglog.warning("Rule (%s) is never reduced", rejected)
3567 errorlog.warning("Rule (%s) is never reduced", rejected)
3568 warned_never.append(rejected)
3570 # Write the table file if requested
3571 if write_tables:
3572 try:
3573 lr.write_table(tabmodule, outputdir, signature)
3574 if tabmodule in sys.modules:
3575 del sys.modules[tabmodule]
3576 except IOError as e:
3577 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3579 # Write a pickled version of the tables
3580 if picklefile:
3581 try:
3582 lr.pickle_table(picklefile, signature)
3583 except IOError as e:
3584 errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3586 # Build the parser
3587 lr.bind_callables(pinfo.pdict)
3588 parser = LRParser(lr, pinfo.error_func)
3590 parse = parser.parse
3591 return parser