Coverage for python/lsst/daf/butler/registry/queries/expressions/parser/ply/yacc.py: 5%
1929 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-02 18:18 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-02 18:18 -0700
1# -----------------------------------------------------------------------------
2# ply: yacc.py
3#
4# Copyright (C) 2001-2018
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12# * Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14# * Redistributions in binary form must reproduce the above copyright notice,
15# this list of conditions and the following disclaimer in the documentation
16# and/or other materials provided with the distribution.
17# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18# endorse or promote products derived from this software without
19# specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32# -----------------------------------------------------------------------------
33#
34# This implements an LR parser that is constructed from grammar rules defined
35# as Python functions. The grammar is specified by supplying the BNF inside
36# Python documentation strings. The inspiration for this technique was borrowed
37# from John Aycock's Spark parsing system. PLY might be viewed as cross between
38# Spark and the GNU bison utility.
39#
40# The current implementation is only somewhat object-oriented. The
41# LR parser itself is defined in terms of an object (which allows multiple
42# parsers to co-exist). However, most of the variables used during table
43# construction are defined in terms of global variables. Users shouldn't
44# notice unless they are trying to define multiple parsers at the same
45# time using threads (in which case they should have their head examined).
46#
47# This implementation supports both SLR and LALR(1) parsing. LALR(1)
48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51# by the more efficient DeRemer and Pennello algorithm.
52#
53# :::::::: WARNING :::::::
54#
55# Construction of LR parsing tables is fairly complicated and expensive.
56# To make this module run fast, a *LOT* of work has been put into
57# optimization---often at the expensive of readability and what might
58# consider to be good Python "coding style." Modify the code at your
59# own risk!
60# ----------------------------------------------------------------------------
62import inspect
63import os.path
64import re
65import sys
66import types
67import warnings
69__version__ = "3.11"
70__tabversion__ = "3.10"
72# -----------------------------------------------------------------------------
73# === User configurable parameters ===
74#
75# Change these to modify the default behavior of yacc (if you wish)
76# -----------------------------------------------------------------------------
78yaccdebug = True # Debugging mode. If set, yacc generates a
79# a 'parser.out' file in the current directory
81debug_file = "parser.out" # Default name of the debugging file
82tab_module = "parsetab" # Default name of the table module
83default_lr = "LALR" # Default LR table generation method
85error_count = 3 # Number of symbols that must be shifted to leave recovery mode
87yaccdevel = False # Set to True if developing yacc. This turns off optimized
88# implementations of certain functions.
90resultlimit = 40 # Size limit of results when running in debug mode.
92pickle_protocol = 0 # Protocol to use when writing pickle files
94# String type-checking compatibility
95if sys.version_info[0] < 3: 95 ↛ 96line 95 didn't jump to line 96, because the condition on line 95 was never true
96 string_types = basestring
97else:
98 string_types = str
100MAXINT = sys.maxsize
102# This object is a stand-in for a logging object created by the
103# logging module. PLY will use this by default to create things
104# such as the parser.out file. If a user wants more detailed
105# information, they can create their own logging object and pass
106# it into PLY.
109class PlyLogger(object):
110 def __init__(self, f):
111 self.f = f
113 def debug(self, msg, *args, **kwargs):
114 self.f.write((msg % args) + "\n")
116 info = debug
118 def warning(self, msg, *args, **kwargs):
119 self.f.write("WARNING: " + (msg % args) + "\n")
121 def error(self, msg, *args, **kwargs):
122 self.f.write("ERROR: " + (msg % args) + "\n")
124 critical = debug
127# Null logger is used when no output is generated. Does nothing.
128class NullLogger(object):
129 def __getattribute__(self, name):
130 return self
132 def __call__(self, *args, **kwargs):
133 return self
136# Exception raised for yacc-related errors
137class YaccError(Exception):
138 pass
141# Format the result message that the parser produces when running in debug mode.
142def format_result(r):
143 repr_str = repr(r)
144 if "\n" in repr_str:
145 repr_str = repr(repr_str)
146 if len(repr_str) > resultlimit:
147 repr_str = repr_str[:resultlimit] + " ..."
148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__, id(r), repr_str)
149 return result
152# Format stack entries when the parser is running in debug mode
153def format_stack_entry(r):
154 repr_str = repr(r)
155 if "\n" in repr_str:
156 repr_str = repr(repr_str)
157 if len(repr_str) < 16:
158 return repr_str
159 else:
160 return "<%s @ 0x%x>" % (type(r).__name__, id(r))
163# Panic mode error recovery support. This feature is being reworked--much of the
164# code here is to offer a deprecation/backwards compatible transition
166_errok = None
167_token = None
168_restart = None
169_warnmsg = """PLY: Don't use global functions errok(), token(), and restart() in p_error().
170Instead, invoke the methods on the associated parser instance:
172 def p_error(p):
173 ...
174 # Use parser.errok(), parser.token(), parser.restart()
175 ...
177 parser = yacc.yacc()
178"""
181def errok():
182 warnings.warn(_warnmsg)
183 return _errok()
186def restart():
187 warnings.warn(_warnmsg)
188 return _restart()
191def token():
192 warnings.warn(_warnmsg)
193 return _token()
196# Utility function to call the p_error() function with some deprecation hacks
197def call_errorfunc(errorfunc, token, parser):
198 global _errok, _token, _restart
199 _errok = parser.errok
200 _token = parser.token
201 _restart = parser.restart
202 r = errorfunc(token)
203 try:
204 del _errok, _token, _restart
205 except NameError:
206 pass
207 return r
210# -----------------------------------------------------------------------------
211# === LR Parsing Engine ===
212#
213# The following classes are used for the LR parser itself. These are not
214# used during table construction and are independent of the actual LR
215# table generation algorithm
216# -----------------------------------------------------------------------------
218# This class is used to hold non-terminal grammar symbols during parsing.
219# It normally has the following attributes set:
220# .type = Grammar symbol type
221# .value = Symbol value
222# .lineno = Starting line number
223# .endlineno = Ending line number (optional, set automatically)
224# .lexpos = Starting lex position
225# .endlexpos = Ending lex position (optional, set automatically)
228class YaccSymbol:
229 def __str__(self):
230 return self.type
232 def __repr__(self):
233 return str(self)
236# This class is a wrapper around the objects actually passed to each
237# grammar rule. Index lookup and assignment actually assign the
238# .value attribute of the underlying YaccSymbol object.
239# The lineno() method returns the line number of a given
240# item (or 0 if not defined). The linespan() method returns
241# a tuple of (startline,endline) representing the range of lines
242# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
243# representing the range of positional information for a symbol.
246class YaccProduction:
247 def __init__(self, s, stack=None):
248 self.slice = s
249 self.stack = stack
250 self.lexer = None
251 self.parser = None
253 def __getitem__(self, n):
254 if isinstance(n, slice):
255 return [s.value for s in self.slice[n]]
256 elif n >= 0:
257 return self.slice[n].value
258 else:
259 return self.stack[n].value
261 def __setitem__(self, n, v):
262 self.slice[n].value = v
264 def __getslice__(self, i, j):
265 return [s.value for s in self.slice[i:j]]
267 def __len__(self):
268 return len(self.slice)
270 def lineno(self, n):
271 return getattr(self.slice[n], "lineno", 0)
273 def set_lineno(self, n, lineno):
274 self.slice[n].lineno = lineno
276 def linespan(self, n):
277 startline = getattr(self.slice[n], "lineno", 0)
278 endline = getattr(self.slice[n], "endlineno", startline)
279 return startline, endline
281 def lexpos(self, n):
282 return getattr(self.slice[n], "lexpos", 0)
284 def set_lexpos(self, n, lexpos):
285 self.slice[n].lexpos = lexpos
287 def lexspan(self, n):
288 startpos = getattr(self.slice[n], "lexpos", 0)
289 endpos = getattr(self.slice[n], "endlexpos", startpos)
290 return startpos, endpos
292 def error(self):
293 raise SyntaxError
296# -----------------------------------------------------------------------------
297# == LRParser ==
298#
299# The LR Parsing engine.
300# -----------------------------------------------------------------------------
303class LRParser:
304 def __init__(self, lrtab, errorf):
305 self.productions = lrtab.lr_productions
306 self.action = lrtab.lr_action
307 self.goto = lrtab.lr_goto
308 self.errorfunc = errorf
309 self.set_defaulted_states()
310 self.errorok = True
312 def errok(self):
313 self.errorok = True
315 def restart(self):
316 del self.statestack[:]
317 del self.symstack[:]
318 sym = YaccSymbol()
319 sym.type = "$end"
320 self.symstack.append(sym)
321 self.statestack.append(0)
323 # Defaulted state support.
324 # This method identifies parser states where there is only one possible reduction action.
325 # For such states, the parser can make a choose to make a rule reduction without consuming
326 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
327 # certain kinds of advanced parsing situations where the lexer and parser interact with
328 # each other or change states (i.e., manipulation of scope, lexer states, etc.).
329 #
330 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
331 def set_defaulted_states(self):
332 self.defaulted_states = {}
333 for state, actions in self.action.items():
334 rules = list(actions.values())
335 if len(rules) == 1 and rules[0] < 0:
336 self.defaulted_states[state] = rules[0]
338 def disable_defaulted_states(self):
339 self.defaulted_states = {}
341 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
342 if debug or yaccdevel:
343 if isinstance(debug, int):
344 debug = PlyLogger(sys.stderr)
345 return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
346 elif tracking:
347 return self.parseopt(input, lexer, debug, tracking, tokenfunc)
348 else:
349 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
351 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
352 # parsedebug().
353 #
354 # This is the debugging enabled version of parse(). All changes made to the
355 # parsing engine should be made here. Optimized versions of this function
356 # are automatically created by the ply/ygen.py script. This script cuts out
357 # sections enclosed in markers such as this:
358 #
359 # #--! DEBUG
360 # statements
361 # #--! DEBUG
362 #
363 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
365 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
366 # --! parsedebug-start
367 lookahead = None # Current lookahead symbol
368 lookaheadstack = [] # Stack of lookahead symbols
369 actions = self.action # Local reference to action table (to avoid lookup on self.)
370 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
371 prod = self.productions # Local reference to production list (to avoid lookup on self.)
372 defaulted_states = self.defaulted_states # Local reference to defaulted states
373 pslice = YaccProduction(None) # Production object passed to grammar rules
374 errorcount = 0 # Used during error recovery
376 # --! DEBUG
377 debug.info("PLY: PARSE DEBUG START")
378 # --! DEBUG
380 # If no lexer was given, we will try to use the lex module
381 if not lexer:
382 from . import lex
384 lexer = lex.lexer
386 # Set up the lexer and parser objects on pslice
387 pslice.lexer = lexer
388 pslice.parser = self
390 # If input was supplied, pass to lexer
391 if input is not None:
392 lexer.input(input)
394 if tokenfunc is None:
395 # Tokenize function
396 get_token = lexer.token
397 else:
398 get_token = tokenfunc
400 # Set the parser() token method (sometimes used in error recovery)
401 self.token = get_token
403 # Set up the state and symbol stacks
405 statestack = [] # Stack of parsing states
406 self.statestack = statestack
407 symstack = [] # Stack of grammar symbols
408 self.symstack = symstack
410 pslice.stack = symstack # Put in the production
411 errtoken = None # Err token
413 # The start state is assumed to be (0,$end)
415 statestack.append(0)
416 sym = YaccSymbol()
417 sym.type = "$end"
418 symstack.append(sym)
419 state = 0
420 while True:
421 # Get the next symbol on the input. If a lookahead symbol
422 # is already set, we just use that. Otherwise, we'll pull
423 # the next token off of the lookaheadstack or from the lexer
425 # --! DEBUG
426 debug.debug("")
427 debug.debug("State : %s", state)
428 # --! DEBUG
430 if state not in defaulted_states:
431 if not lookahead:
432 if not lookaheadstack:
433 lookahead = get_token() # Get the next token
434 else:
435 lookahead = lookaheadstack.pop()
436 if not lookahead:
437 lookahead = YaccSymbol()
438 lookahead.type = "$end"
440 # Check the action table
441 ltype = lookahead.type
442 t = actions[state].get(ltype)
443 else:
444 t = defaulted_states[state]
445 # --! DEBUG
446 debug.debug("Defaulted state %s: Reduce using %d", state, -t)
447 # --! DEBUG
449 # --! DEBUG
450 debug.debug(
451 "Stack : %s",
452 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip(),
453 )
454 # --! DEBUG
456 if t is not None:
457 if t > 0:
458 # shift a symbol on the stack
459 statestack.append(t)
460 state = t
462 # --! DEBUG
463 debug.debug("Action : Shift and goto state %s", t)
464 # --! DEBUG
466 symstack.append(lookahead)
467 lookahead = None
469 # Decrease error count on successful shift
470 if errorcount:
471 errorcount -= 1
472 continue
474 if t < 0:
475 # reduce a symbol on the stack, emit a production
476 p = prod[-t]
477 pname = p.name
478 plen = p.len
480 # Get production function
481 sym = YaccSymbol()
482 sym.type = pname # Production name
483 sym.value = None
485 # --! DEBUG
486 if plen:
487 debug.info(
488 "Action : Reduce rule [%s] with %s and goto state %d",
489 p.str,
490 "[" + ",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]]) + "]",
491 goto[statestack[-1 - plen]][pname],
492 )
493 else:
494 debug.info(
495 "Action : Reduce rule [%s] with %s and goto state %d",
496 p.str,
497 [],
498 goto[statestack[-1]][pname],
499 )
501 # --! DEBUG
503 if plen:
504 targ = symstack[-plen - 1 :]
505 targ[0] = sym
507 # --! TRACKING
508 if tracking:
509 t1 = targ[1]
510 sym.lineno = t1.lineno
511 sym.lexpos = t1.lexpos
512 t1 = targ[-1]
513 sym.endlineno = getattr(t1, "endlineno", t1.lineno)
514 sym.endlexpos = getattr(t1, "endlexpos", t1.lexpos)
515 # --! TRACKING
517 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
518 # The code enclosed in this section is duplicated
519 # below as a performance optimization. Make sure
520 # changes get made in both locations.
522 pslice.slice = targ
524 try:
525 # Call the grammar rule with our special slice object
526 del symstack[-plen:]
527 self.state = state
528 p.callable(pslice)
529 del statestack[-plen:]
530 # --! DEBUG
531 debug.info("Result : %s", format_result(pslice[0]))
532 # --! DEBUG
533 symstack.append(sym)
534 state = goto[statestack[-1]][pname]
535 statestack.append(state)
536 except SyntaxError:
537 # If an error was set. Enter error recovery state
538 lookaheadstack.append(lookahead) # Save the current lookahead token
539 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
540 statestack.pop() # Pop back one state (before the reduce)
541 state = statestack[-1]
542 sym.type = "error"
543 sym.value = "error"
544 lookahead = sym
545 errorcount = error_count
546 self.errorok = False
548 continue
549 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
551 else:
552 # --! TRACKING
553 if tracking:
554 sym.lineno = lexer.lineno
555 sym.lexpos = lexer.lexpos
556 # --! TRACKING
558 targ = [sym]
560 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
561 # The code enclosed in this section is duplicated
562 # above as a performance optimization. Make sure
563 # changes get made in both locations.
565 pslice.slice = targ
567 try:
568 # Call the grammar rule with our special slice object
569 self.state = state
570 p.callable(pslice)
571 # --! DEBUG
572 debug.info("Result : %s", format_result(pslice[0]))
573 # --! DEBUG
574 symstack.append(sym)
575 state = goto[statestack[-1]][pname]
576 statestack.append(state)
577 except SyntaxError:
578 # If an error was set. Enter error recovery state
579 lookaheadstack.append(lookahead) # Save the current lookahead token
580 statestack.pop() # Pop back one state (before the reduce)
581 state = statestack[-1]
582 sym.type = "error"
583 sym.value = "error"
584 lookahead = sym
585 errorcount = error_count
586 self.errorok = False
588 continue
589 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
591 if t == 0:
592 n = symstack[-1]
593 result = getattr(n, "value", None)
594 # --! DEBUG
595 debug.info("Done : Returning %s", format_result(result))
596 debug.info("PLY: PARSE DEBUG END")
597 # --! DEBUG
598 return result
600 if t is None:
601 # --! DEBUG
602 debug.error(
603 "Error : %s",
604 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip(),
605 )
606 # --! DEBUG
608 # We have some kind of parsing error here. To handle
609 # this, we are going to push the current token onto
610 # the tokenstack and replace it with an 'error' token.
611 # If there are any synchronization rules, they may
612 # catch it.
613 #
614 # In addition to pushing the error token, we call call
615 # the user defined p_error() function if this is the
616 # first syntax error. This function is only called if
617 # errorcount == 0.
618 if errorcount == 0 or self.errorok:
619 errorcount = error_count
620 self.errorok = False
621 errtoken = lookahead
622 if errtoken.type == "$end":
623 errtoken = None # End of file!
624 if self.errorfunc:
625 if errtoken and not hasattr(errtoken, "lexer"):
626 errtoken.lexer = lexer
627 self.state = state
628 tok = call_errorfunc(self.errorfunc, errtoken, self)
629 if self.errorok:
630 # User must have done some kind of panic
631 # mode recovery on their own. The
632 # returned token is the next lookahead
633 lookahead = tok
634 errtoken = None
635 continue
636 else:
637 if errtoken:
638 if hasattr(errtoken, "lineno"):
639 lineno = lookahead.lineno
640 else:
641 lineno = 0
642 if lineno:
643 sys.stderr.write(
644 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)
645 )
646 else:
647 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
648 else:
649 sys.stderr.write("yacc: Parse error in input. EOF\n")
650 return
652 else:
653 errorcount = error_count
655 # case 1: the statestack only has 1 entry on it. If we're in this state, the
656 # entire parse has been rolled back and we're completely hosed. The token is
657 # discarded and we just keep going.
659 if len(statestack) <= 1 and lookahead.type != "$end":
660 lookahead = None
661 errtoken = None
662 state = 0
663 # Nuke the pushback stack
664 del lookaheadstack[:]
665 continue
667 # case 2: the statestack has a couple of entries on it, but we're
668 # at the end of the file. nuke the top entry and generate an error token
670 # Start nuking entries on the stack
671 if lookahead.type == "$end":
672 # Whoa. We're really hosed here. Bail out
673 return
675 if lookahead.type != "error":
676 sym = symstack[-1]
677 if sym.type == "error":
678 # Hmmm. Error is on top of stack, we'll just nuke input
679 # symbol and continue
680 # --! TRACKING
681 if tracking:
682 sym.endlineno = getattr(lookahead, "lineno", sym.lineno)
683 sym.endlexpos = getattr(lookahead, "lexpos", sym.lexpos)
684 # --! TRACKING
685 lookahead = None
686 continue
688 # Create the error symbol for the first time and make it the new lookahead symbol
689 t = YaccSymbol()
690 t.type = "error"
692 if hasattr(lookahead, "lineno"):
693 t.lineno = t.endlineno = lookahead.lineno
694 if hasattr(lookahead, "lexpos"):
695 t.lexpos = t.endlexpos = lookahead.lexpos
696 t.value = lookahead
697 lookaheadstack.append(lookahead)
698 lookahead = t
699 else:
700 sym = symstack.pop()
701 # --! TRACKING
702 if tracking:
703 lookahead.lineno = sym.lineno
704 lookahead.lexpos = sym.lexpos
705 # --! TRACKING
706 statestack.pop()
707 state = statestack[-1]
709 continue
711 # Call an error function here
712 raise RuntimeError("yacc: internal parser error!!!\n")
714 # --! parsedebug-end
716 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
717 # parseopt().
718 #
719 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
720 # This code is automatically generated by the ply/ygen.py script. Make
721 # changes to the parsedebug() method instead.
722 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
724 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
725 # --! parseopt-start
726 lookahead = None # Current lookahead symbol
727 lookaheadstack = [] # Stack of lookahead symbols
728 actions = self.action # Local reference to action table (to avoid lookup on self.)
729 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
730 prod = self.productions # Local reference to production list (to avoid lookup on self.)
731 defaulted_states = self.defaulted_states # Local reference to defaulted states
732 pslice = YaccProduction(None) # Production object passed to grammar rules
733 errorcount = 0 # Used during error recovery
735 # If no lexer was given, we will try to use the lex module
736 if not lexer:
737 from . import lex
739 lexer = lex.lexer
741 # Set up the lexer and parser objects on pslice
742 pslice.lexer = lexer
743 pslice.parser = self
745 # If input was supplied, pass to lexer
746 if input is not None:
747 lexer.input(input)
749 if tokenfunc is None:
750 # Tokenize function
751 get_token = lexer.token
752 else:
753 get_token = tokenfunc
755 # Set the parser() token method (sometimes used in error recovery)
756 self.token = get_token
758 # Set up the state and symbol stacks
760 statestack = [] # Stack of parsing states
761 self.statestack = statestack
762 symstack = [] # Stack of grammar symbols
763 self.symstack = symstack
765 pslice.stack = symstack # Put in the production
766 errtoken = None # Err token
768 # The start state is assumed to be (0,$end)
770 statestack.append(0)
771 sym = YaccSymbol()
772 sym.type = "$end"
773 symstack.append(sym)
774 state = 0
775 while True:
776 # Get the next symbol on the input. If a lookahead symbol
777 # is already set, we just use that. Otherwise, we'll pull
778 # the next token off of the lookaheadstack or from the lexer
780 if state not in defaulted_states:
781 if not lookahead:
782 if not lookaheadstack:
783 lookahead = get_token() # Get the next token
784 else:
785 lookahead = lookaheadstack.pop()
786 if not lookahead:
787 lookahead = YaccSymbol()
788 lookahead.type = "$end"
790 # Check the action table
791 ltype = lookahead.type
792 t = actions[state].get(ltype)
793 else:
794 t = defaulted_states[state]
796 if t is not None:
797 if t > 0:
798 # shift a symbol on the stack
799 statestack.append(t)
800 state = t
802 symstack.append(lookahead)
803 lookahead = None
805 # Decrease error count on successful shift
806 if errorcount:
807 errorcount -= 1
808 continue
810 if t < 0:
811 # reduce a symbol on the stack, emit a production
812 p = prod[-t]
813 pname = p.name
814 plen = p.len
816 # Get production function
817 sym = YaccSymbol()
818 sym.type = pname # Production name
819 sym.value = None
821 if plen:
822 targ = symstack[-plen - 1 :]
823 targ[0] = sym
825 # --! TRACKING
826 if tracking:
827 t1 = targ[1]
828 sym.lineno = t1.lineno
829 sym.lexpos = t1.lexpos
830 t1 = targ[-1]
831 sym.endlineno = getattr(t1, "endlineno", t1.lineno)
832 sym.endlexpos = getattr(t1, "endlexpos", t1.lexpos)
833 # --! TRACKING
835 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
836 # The code enclosed in this section is duplicated
837 # below as a performance optimization. Make sure
838 # changes get made in both locations.
840 pslice.slice = targ
842 try:
843 # Call the grammar rule with our special slice object
844 del symstack[-plen:]
845 self.state = state
846 p.callable(pslice)
847 del statestack[-plen:]
848 symstack.append(sym)
849 state = goto[statestack[-1]][pname]
850 statestack.append(state)
851 except SyntaxError:
852 # If an error was set. Enter error recovery state
853 lookaheadstack.append(lookahead) # Save the current lookahead token
854 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
855 statestack.pop() # Pop back one state (before the reduce)
856 state = statestack[-1]
857 sym.type = "error"
858 sym.value = "error"
859 lookahead = sym
860 errorcount = error_count
861 self.errorok = False
863 continue
864 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
866 else:
867 # --! TRACKING
868 if tracking:
869 sym.lineno = lexer.lineno
870 sym.lexpos = lexer.lexpos
871 # --! TRACKING
873 targ = [sym]
875 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
876 # The code enclosed in this section is duplicated
877 # above as a performance optimization. Make sure
878 # changes get made in both locations.
880 pslice.slice = targ
882 try:
883 # Call the grammar rule with our special slice object
884 self.state = state
885 p.callable(pslice)
886 symstack.append(sym)
887 state = goto[statestack[-1]][pname]
888 statestack.append(state)
889 except SyntaxError:
890 # If an error was set. Enter error recovery state
891 lookaheadstack.append(lookahead) # Save the current lookahead token
892 statestack.pop() # Pop back one state (before the reduce)
893 state = statestack[-1]
894 sym.type = "error"
895 sym.value = "error"
896 lookahead = sym
897 errorcount = error_count
898 self.errorok = False
900 continue
901 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
903 if t == 0:
904 n = symstack[-1]
905 result = getattr(n, "value", None)
906 return result
908 if t is None:
909 # We have some kind of parsing error here. To handle
910 # this, we are going to push the current token onto
911 # the tokenstack and replace it with an 'error' token.
912 # If there are any synchronization rules, they may
913 # catch it.
914 #
915 # In addition to pushing the error token, we call call
916 # the user defined p_error() function if this is the
917 # first syntax error. This function is only called if
918 # errorcount == 0.
919 if errorcount == 0 or self.errorok:
920 errorcount = error_count
921 self.errorok = False
922 errtoken = lookahead
923 if errtoken.type == "$end":
924 errtoken = None # End of file!
925 if self.errorfunc:
926 if errtoken and not hasattr(errtoken, "lexer"):
927 errtoken.lexer = lexer
928 self.state = state
929 tok = call_errorfunc(self.errorfunc, errtoken, self)
930 if self.errorok:
931 # User must have done some kind of panic
932 # mode recovery on their own. The
933 # returned token is the next lookahead
934 lookahead = tok
935 errtoken = None
936 continue
937 else:
938 if errtoken:
939 if hasattr(errtoken, "lineno"):
940 lineno = lookahead.lineno
941 else:
942 lineno = 0
943 if lineno:
944 sys.stderr.write(
945 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)
946 )
947 else:
948 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
949 else:
950 sys.stderr.write("yacc: Parse error in input. EOF\n")
951 return
953 else:
954 errorcount = error_count
956 # case 1: the statestack only has 1 entry on it. If we're in this state, the
957 # entire parse has been rolled back and we're completely hosed. The token is
958 # discarded and we just keep going.
960 if len(statestack) <= 1 and lookahead.type != "$end":
961 lookahead = None
962 errtoken = None
963 state = 0
964 # Nuke the pushback stack
965 del lookaheadstack[:]
966 continue
968 # case 2: the statestack has a couple of entries on it, but we're
969 # at the end of the file. nuke the top entry and generate an error token
971 # Start nuking entries on the stack
972 if lookahead.type == "$end":
973 # Whoa. We're really hosed here. Bail out
974 return
976 if lookahead.type != "error":
977 sym = symstack[-1]
978 if sym.type == "error":
979 # Hmmm. Error is on top of stack, we'll just nuke input
980 # symbol and continue
981 # --! TRACKING
982 if tracking:
983 sym.endlineno = getattr(lookahead, "lineno", sym.lineno)
984 sym.endlexpos = getattr(lookahead, "lexpos", sym.lexpos)
985 # --! TRACKING
986 lookahead = None
987 continue
989 # Create the error symbol for the first time and make it the new lookahead symbol
990 t = YaccSymbol()
991 t.type = "error"
993 if hasattr(lookahead, "lineno"):
994 t.lineno = t.endlineno = lookahead.lineno
995 if hasattr(lookahead, "lexpos"):
996 t.lexpos = t.endlexpos = lookahead.lexpos
997 t.value = lookahead
998 lookaheadstack.append(lookahead)
999 lookahead = t
1000 else:
1001 sym = symstack.pop()
1002 # --! TRACKING
1003 if tracking:
1004 lookahead.lineno = sym.lineno
1005 lookahead.lexpos = sym.lexpos
1006 # --! TRACKING
1007 statestack.pop()
1008 state = statestack[-1]
1010 continue
1012 # Call an error function here
1013 raise RuntimeError("yacc: internal parser error!!!\n")
1015 # --! parseopt-end
1017 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1018 # parseopt_notrack().
1019 #
1020 # Optimized version of parseopt() with line number tracking removed.
1021 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
1022 # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
1023 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1025 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
1026 # --! parseopt-notrack-start
1027 lookahead = None # Current lookahead symbol
1028 lookaheadstack = [] # Stack of lookahead symbols
1029 actions = self.action # Local reference to action table (to avoid lookup on self.)
1030 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
1031 prod = self.productions # Local reference to production list (to avoid lookup on self.)
1032 defaulted_states = self.defaulted_states # Local reference to defaulted states
1033 pslice = YaccProduction(None) # Production object passed to grammar rules
1034 errorcount = 0 # Used during error recovery
1036 # If no lexer was given, we will try to use the lex module
1037 if not lexer:
1038 from . import lex
1040 lexer = lex.lexer
1042 # Set up the lexer and parser objects on pslice
1043 pslice.lexer = lexer
1044 pslice.parser = self
1046 # If input was supplied, pass to lexer
1047 if input is not None:
1048 lexer.input(input)
1050 if tokenfunc is None:
1051 # Tokenize function
1052 get_token = lexer.token
1053 else:
1054 get_token = tokenfunc
1056 # Set the parser() token method (sometimes used in error recovery)
1057 self.token = get_token
1059 # Set up the state and symbol stacks
1061 statestack = [] # Stack of parsing states
1062 self.statestack = statestack
1063 symstack = [] # Stack of grammar symbols
1064 self.symstack = symstack
1066 pslice.stack = symstack # Put in the production
1067 errtoken = None # Err token
1069 # The start state is assumed to be (0,$end)
1071 statestack.append(0)
1072 sym = YaccSymbol()
1073 sym.type = "$end"
1074 symstack.append(sym)
1075 state = 0
1076 while True:
1077 # Get the next symbol on the input. If a lookahead symbol
1078 # is already set, we just use that. Otherwise, we'll pull
1079 # the next token off of the lookaheadstack or from the lexer
1081 if state not in defaulted_states:
1082 if not lookahead:
1083 if not lookaheadstack:
1084 lookahead = get_token() # Get the next token
1085 else:
1086 lookahead = lookaheadstack.pop()
1087 if not lookahead:
1088 lookahead = YaccSymbol()
1089 lookahead.type = "$end"
1091 # Check the action table
1092 ltype = lookahead.type
1093 t = actions[state].get(ltype)
1094 else:
1095 t = defaulted_states[state]
1097 if t is not None:
1098 if t > 0:
1099 # shift a symbol on the stack
1100 statestack.append(t)
1101 state = t
1103 symstack.append(lookahead)
1104 lookahead = None
1106 # Decrease error count on successful shift
1107 if errorcount:
1108 errorcount -= 1
1109 continue
1111 if t < 0:
1112 # reduce a symbol on the stack, emit a production
1113 p = prod[-t]
1114 pname = p.name
1115 plen = p.len
1117 # Get production function
1118 sym = YaccSymbol()
1119 sym.type = pname # Production name
1120 sym.value = None
1122 if plen:
1123 targ = symstack[-plen - 1 :]
1124 targ[0] = sym
1126 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1127 # The code enclosed in this section is duplicated
1128 # below as a performance optimization. Make sure
1129 # changes get made in both locations.
1131 pslice.slice = targ
1133 try:
1134 # Call the grammar rule with our special slice object
1135 del symstack[-plen:]
1136 self.state = state
1137 p.callable(pslice)
1138 del statestack[-plen:]
1139 symstack.append(sym)
1140 state = goto[statestack[-1]][pname]
1141 statestack.append(state)
1142 except SyntaxError:
1143 # If an error was set. Enter error recovery state
1144 lookaheadstack.append(lookahead) # Save the current lookahead token
1145 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
1146 statestack.pop() # Pop back one state (before the reduce)
1147 state = statestack[-1]
1148 sym.type = "error"
1149 sym.value = "error"
1150 lookahead = sym
1151 errorcount = error_count
1152 self.errorok = False
1154 continue
1155 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1157 else:
1158 targ = [sym]
1160 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1161 # The code enclosed in this section is duplicated
1162 # above as a performance optimization. Make sure
1163 # changes get made in both locations.
1165 pslice.slice = targ
1167 try:
1168 # Call the grammar rule with our special slice object
1169 self.state = state
1170 p.callable(pslice)
1171 symstack.append(sym)
1172 state = goto[statestack[-1]][pname]
1173 statestack.append(state)
1174 except SyntaxError:
1175 # If an error was set. Enter error recovery state
1176 lookaheadstack.append(lookahead) # Save the current lookahead token
1177 statestack.pop() # Pop back one state (before the reduce)
1178 state = statestack[-1]
1179 sym.type = "error"
1180 sym.value = "error"
1181 lookahead = sym
1182 errorcount = error_count
1183 self.errorok = False
1185 continue
1186 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1188 if t == 0:
1189 n = symstack[-1]
1190 result = getattr(n, "value", None)
1191 return result
1193 if t is None:
1194 # We have some kind of parsing error here. To handle
1195 # this, we are going to push the current token onto
1196 # the tokenstack and replace it with an 'error' token.
1197 # If there are any synchronization rules, they may
1198 # catch it.
1199 #
1200 # In addition to pushing the error token, we call call
1201 # the user defined p_error() function if this is the
1202 # first syntax error. This function is only called if
1203 # errorcount == 0.
1204 if errorcount == 0 or self.errorok:
1205 errorcount = error_count
1206 self.errorok = False
1207 errtoken = lookahead
1208 if errtoken.type == "$end":
1209 errtoken = None # End of file!
1210 if self.errorfunc:
1211 if errtoken and not hasattr(errtoken, "lexer"):
1212 errtoken.lexer = lexer
1213 self.state = state
1214 tok = call_errorfunc(self.errorfunc, errtoken, self)
1215 if self.errorok:
1216 # User must have done some kind of panic
1217 # mode recovery on their own. The
1218 # returned token is the next lookahead
1219 lookahead = tok
1220 errtoken = None
1221 continue
1222 else:
1223 if errtoken:
1224 if hasattr(errtoken, "lineno"):
1225 lineno = lookahead.lineno
1226 else:
1227 lineno = 0
1228 if lineno:
1229 sys.stderr.write(
1230 "yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)
1231 )
1232 else:
1233 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1234 else:
1235 sys.stderr.write("yacc: Parse error in input. EOF\n")
1236 return
1238 else:
1239 errorcount = error_count
1241 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1242 # entire parse has been rolled back and we're completely hosed. The token is
1243 # discarded and we just keep going.
1245 if len(statestack) <= 1 and lookahead.type != "$end":
1246 lookahead = None
1247 errtoken = None
1248 state = 0
1249 # Nuke the pushback stack
1250 del lookaheadstack[:]
1251 continue
1253 # case 2: the statestack has a couple of entries on it, but we're
1254 # at the end of the file. nuke the top entry and generate an error token
1256 # Start nuking entries on the stack
1257 if lookahead.type == "$end":
1258 # Whoa. We're really hosed here. Bail out
1259 return
1261 if lookahead.type != "error":
1262 sym = symstack[-1]
1263 if sym.type == "error":
1264 # Hmmm. Error is on top of stack, we'll just nuke input
1265 # symbol and continue
1266 lookahead = None
1267 continue
1269 # Create the error symbol for the first time and make it the new lookahead symbol
1270 t = YaccSymbol()
1271 t.type = "error"
1273 if hasattr(lookahead, "lineno"):
1274 t.lineno = t.endlineno = lookahead.lineno
1275 if hasattr(lookahead, "lexpos"):
1276 t.lexpos = t.endlexpos = lookahead.lexpos
1277 t.value = lookahead
1278 lookaheadstack.append(lookahead)
1279 lookahead = t
1280 else:
1281 sym = symstack.pop()
1282 statestack.pop()
1283 state = statestack[-1]
1285 continue
1287 # Call an error function here
1288 raise RuntimeError("yacc: internal parser error!!!\n")
1290 # --! parseopt-notrack-end
1293# -----------------------------------------------------------------------------
1294# === Grammar Representation ===
1295#
1296# The following functions, classes, and variables are used to represent and
1297# manipulate the rules that make up a grammar.
1298# -----------------------------------------------------------------------------
1300# regex matching identifiers
1301_is_identifier = re.compile(r"^[a-zA-Z0-9_-]+$")
1303# -----------------------------------------------------------------------------
1304# class Production:
1305#
1306# This class stores the raw information about a single production or grammar rule.
1307# A grammar rule refers to a specification such as this:
1308#
1309# expr : expr PLUS term
1310#
1311# Here are the basic attributes defined on all productions
1312#
1313# name - Name of the production. For example 'expr'
1314# prod - A list of symbols on the right side ['expr','PLUS','term']
1315# prec - Production precedence level
1316# number - Production number.
1317# func - Function that executes on reduce
1318# file - File where production function is defined
1319# lineno - Line number where production function is defined
1320#
1321# The following attributes are defined or optional.
1322#
1323# len - Length of the production (number of symbols on right hand side)
1324# usyms - Set of unique symbols found in the production
1325# -----------------------------------------------------------------------------
1328class Production(object):
1329 reduced = 0
1331 def __init__(self, number, name, prod, precedence=("right", 0), func=None, file="", line=0):
1332 self.name = name
1333 self.prod = tuple(prod)
1334 self.number = number
1335 self.func = func
1336 self.callable = None
1337 self.file = file
1338 self.line = line
1339 self.prec = precedence
1341 # Internal settings used during table construction
1343 self.len = len(self.prod) # Length of the production
1345 # Create a list of unique production symbols used in the production
1346 self.usyms = []
1347 for s in self.prod:
1348 if s not in self.usyms:
1349 self.usyms.append(s)
1351 # List of all LR items for the production
1352 self.lr_items = []
1353 self.lr_next = None
1355 # Create a string representation
1356 if self.prod:
1357 self.str = "%s -> %s" % (self.name, " ".join(self.prod))
1358 else:
1359 self.str = "%s -> <empty>" % self.name
1361 def __str__(self):
1362 return self.str
1364 def __repr__(self):
1365 return "Production(" + str(self) + ")"
1367 def __len__(self):
1368 return len(self.prod)
1370 def __nonzero__(self):
1371 return 1
1373 def __getitem__(self, index):
1374 return self.prod[index]
1376 # Return the nth lr_item from the production (or None if at the end)
1377 def lr_item(self, n):
1378 if n > len(self.prod):
1379 return None
1380 p = LRItem(self, n)
1381 # Precompute the list of productions immediately following.
1382 try:
1383 p.lr_after = self.Prodnames[p.prod[n + 1]]
1384 except (IndexError, KeyError):
1385 p.lr_after = []
1386 try:
1387 p.lr_before = p.prod[n - 1]
1388 except IndexError:
1389 p.lr_before = None
1390 return p
1392 # Bind the production function name to a callable
1393 def bind(self, pdict):
1394 if self.func:
1395 self.callable = pdict[self.func]
1398# This class serves as a minimal standin for Production objects when
1399# reading table data from files. It only contains information
1400# actually used by the LR parsing engine, plus some additional
1401# debugging information.
1402class MiniProduction(object):
1403 def __init__(self, str, name, len, func, file, line):
1404 self.name = name
1405 self.len = len
1406 self.func = func
1407 self.callable = None
1408 self.file = file
1409 self.line = line
1410 self.str = str
1412 def __str__(self):
1413 return self.str
1415 def __repr__(self):
1416 return "MiniProduction(%s)" % self.str
1418 # Bind the production function name to a callable
1419 def bind(self, pdict):
1420 if self.func:
1421 self.callable = pdict[self.func]
1424# -----------------------------------------------------------------------------
1425# class LRItem
1426#
1427# This class represents a specific stage of parsing a production rule. For
1428# example:
1429#
1430# expr : expr . PLUS term
1431#
1432# In the above, the "." represents the current location of the parse. Here
1433# basic attributes:
1434#
1435# name - Name of the production. For example 'expr'
1436# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1437# number - Production number.
1438#
1439# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1440# then lr_next refers to 'expr -> expr PLUS . term'
1441# lr_index - LR item index (location of the ".") in the prod list.
1442# lookaheads - LALR lookahead symbols for this item
1443# len - Length of the production (number of symbols on right hand side)
1444# lr_after - List of all productions that immediately follow
1445# lr_before - Grammar symbol immediately before
1446# -----------------------------------------------------------------------------
1449class LRItem(object):
1450 def __init__(self, p, n):
1451 self.name = p.name
1452 self.prod = list(p.prod)
1453 self.number = p.number
1454 self.lr_index = n
1455 self.lookaheads = {}
1456 self.prod.insert(n, ".")
1457 self.prod = tuple(self.prod)
1458 self.len = len(self.prod)
1459 self.usyms = p.usyms
1461 def __str__(self):
1462 if self.prod:
1463 s = "%s -> %s" % (self.name, " ".join(self.prod))
1464 else:
1465 s = "%s -> <empty>" % self.name
1466 return s
1468 def __repr__(self):
1469 return "LRItem(" + str(self) + ")"
1472# -----------------------------------------------------------------------------
1473# rightmost_terminal()
1474#
1475# Return the rightmost terminal from a list of symbols. Used in add_production()
1476# -----------------------------------------------------------------------------
1477def rightmost_terminal(symbols, terminals):
1478 i = len(symbols) - 1
1479 while i >= 0:
1480 if symbols[i] in terminals:
1481 return symbols[i]
1482 i -= 1
1483 return None
1486# -----------------------------------------------------------------------------
1487# === GRAMMAR CLASS ===
1488#
1489# The following class represents the contents of the specified grammar along
1490# with various computed properties such as first sets, follow sets, LR items, etc.
1491# This data is used for critical parts of the table generation process later.
1492# -----------------------------------------------------------------------------
1495class GrammarError(YaccError):
1496 pass
1499class Grammar(object):
1500 def __init__(self, terminals):
1501 self.Productions = [None] # A list of all of the productions. The first
1502 # entry is always reserved for the purpose of
1503 # building an augmented grammar
1505 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
1506 # productions of that nonterminal.
1508 self.Prodmap = {} # A dictionary that is only used to detect duplicate
1509 # productions.
1511 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
1512 # list of the rules where they are used.
1514 for term in terminals:
1515 self.Terminals[term] = []
1517 self.Terminals["error"] = []
1519 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
1520 # of rule numbers where they are used.
1522 self.First = {} # A dictionary of precomputed FIRST(x) symbols
1524 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
1526 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
1527 # form ('right',level) or ('nonassoc', level) or ('left',level)
1529 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
1530 # This is only used to provide error checking and to generate
1531 # a warning about unused precedence rules.
1533 self.Start = None # Starting symbol for the grammar
1535 def __len__(self):
1536 return len(self.Productions)
1538 def __getitem__(self, index):
1539 return self.Productions[index]
1541 # -----------------------------------------------------------------------------
1542 # set_precedence()
1543 #
1544 # Sets the precedence for a given terminal. assoc is the associativity such as
1545 # 'left','right', or 'nonassoc'. level is a numeric level.
1546 #
1547 # -----------------------------------------------------------------------------
1549 def set_precedence(self, term, assoc, level):
1550 assert self.Productions == [None], "Must call set_precedence() before add_production()"
1551 if term in self.Precedence:
1552 raise GrammarError("Precedence already specified for terminal %r" % term)
1553 if assoc not in ["left", "right", "nonassoc"]:
1554 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1555 self.Precedence[term] = (assoc, level)
1557 # -----------------------------------------------------------------------------
1558 # add_production()
1559 #
1560 # Given an action function, this function assembles a production rule and
1561 # computes its precedence level.
1562 #
1563 # The production rule is supplied as a list of symbols. For example,
1564 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1565 # symbols ['expr','PLUS','term'].
1566 #
1567 # Precedence is determined by the precedence of the right-most non-terminal
1568 # or the precedence of a terminal specified by %prec.
1569 #
1570 # A variety of error checks are performed to make sure production symbols
1571 # are valid and that %prec is used correctly.
1572 # -----------------------------------------------------------------------------
1574 def add_production(self, prodname, syms, func=None, file="", line=0):
1575 if prodname in self.Terminals:
1576 raise GrammarError(
1577 "%s:%d: Illegal rule name %r. Already defined as a token" % (file, line, prodname)
1578 )
1579 if prodname == "error":
1580 raise GrammarError(
1581 "%s:%d: Illegal rule name %r. error is a reserved word" % (file, line, prodname)
1582 )
1583 if not _is_identifier.match(prodname):
1584 raise GrammarError("%s:%d: Illegal rule name %r" % (file, line, prodname))
1586 # Look for literal tokens
1587 for n, s in enumerate(syms):
1588 if s[0] in "'\"":
1589 try:
1590 c = eval(s)
1591 if len(c) > 1:
1592 raise GrammarError(
1593 "%s:%d: Literal token %s in rule %r may only be a single character"
1594 % (file, line, s, prodname)
1595 )
1596 if c not in self.Terminals:
1597 self.Terminals[c] = []
1598 syms[n] = c
1599 continue
1600 except SyntaxError:
1601 pass
1602 if not _is_identifier.match(s) and s != "%prec":
1603 raise GrammarError("%s:%d: Illegal name %r in rule %r" % (file, line, s, prodname))
1605 # Determine the precedence level
1606 if "%prec" in syms:
1607 if syms[-1] == "%prec":
1608 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file, line))
1609 if syms[-2] != "%prec":
1610 raise GrammarError(
1611 "%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file, line)
1612 )
1613 precname = syms[-1]
1614 prodprec = self.Precedence.get(precname)
1615 if not prodprec:
1616 raise GrammarError("%s:%d: Nothing known about the precedence of %r" % (file, line, precname))
1617 else:
1618 self.UsedPrecedence.add(precname)
1619 del syms[-2:] # Drop %prec from the rule
1620 else:
1621 # If no %prec, precedence is determined by the rightmost terminal symbol
1622 precname = rightmost_terminal(syms, self.Terminals)
1623 prodprec = self.Precedence.get(precname, ("right", 0))
1625 # See if the rule is already in the rulemap
1626 map = "%s -> %s" % (prodname, syms)
1627 if map in self.Prodmap:
1628 m = self.Prodmap[map]
1629 raise GrammarError(
1630 "%s:%d: Duplicate rule %s. " % (file, line, m)
1631 + "Previous definition at %s:%d" % (m.file, m.line)
1632 )
1634 # From this point on, everything is valid. Create a new Production instance
1635 pnumber = len(self.Productions)
1636 if prodname not in self.Nonterminals:
1637 self.Nonterminals[prodname] = []
1639 # Add the production number to Terminals and Nonterminals
1640 for t in syms:
1641 if t in self.Terminals:
1642 self.Terminals[t].append(pnumber)
1643 else:
1644 if t not in self.Nonterminals:
1645 self.Nonterminals[t] = []
1646 self.Nonterminals[t].append(pnumber)
1648 # Create a production and add it to the list of productions
1649 p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1650 self.Productions.append(p)
1651 self.Prodmap[map] = p
1653 # Add to the global productions list
1654 try:
1655 self.Prodnames[prodname].append(p)
1656 except KeyError:
1657 self.Prodnames[prodname] = [p]
1659 # -----------------------------------------------------------------------------
1660 # set_start()
1661 #
1662 # Sets the starting symbol and creates the augmented grammar. Production
1663 # rule 0 is S' -> start where start is the start symbol.
1664 # -----------------------------------------------------------------------------
1666 def set_start(self, start=None):
1667 if not start:
1668 start = self.Productions[1].name
1669 if start not in self.Nonterminals:
1670 raise GrammarError("start symbol %s undefined" % start)
1671 self.Productions[0] = Production(0, "S'", [start])
1672 self.Nonterminals[start].append(0)
1673 self.Start = start
1675 # -----------------------------------------------------------------------------
1676 # find_unreachable()
1677 #
1678 # Find all of the nonterminal symbols that can't be reached from the starting
1679 # symbol. Returns a list of nonterminals that can't be reached.
1680 # -----------------------------------------------------------------------------
1682 def find_unreachable(self):
1683 # Mark all symbols that are reachable from a symbol s
1684 def mark_reachable_from(s):
1685 if s in reachable:
1686 return
1687 reachable.add(s)
1688 for p in self.Prodnames.get(s, []):
1689 for r in p.prod:
1690 mark_reachable_from(r)
1692 reachable = set()
1693 mark_reachable_from(self.Productions[0].prod[0])
1694 return [s for s in self.Nonterminals if s not in reachable]
1696 # -----------------------------------------------------------------------------
1697 # infinite_cycles()
1698 #
1699 # This function looks at the various parsing rules and tries to detect
1700 # infinite recursion cycles (grammar rules where there is no possible way
1701 # to derive a string of only terminals).
1702 # -----------------------------------------------------------------------------
1704 def infinite_cycles(self):
1705 terminates = {}
1707 # Terminals:
1708 for t in self.Terminals:
1709 terminates[t] = True
1711 terminates["$end"] = True
1713 # Nonterminals:
1715 # Initialize to false:
1716 for n in self.Nonterminals:
1717 terminates[n] = False
1719 # Then propagate termination until no change:
1720 while True:
1721 some_change = False
1722 for n, pl in self.Prodnames.items():
1723 # Nonterminal n terminates iff any of its productions terminates.
1724 for p in pl:
1725 # Production p terminates iff all of its rhs symbols terminate.
1726 for s in p.prod:
1727 if not terminates[s]:
1728 # The symbol s does not terminate,
1729 # so production p does not terminate.
1730 p_terminates = False
1731 break
1732 else:
1733 # didn't break from the loop,
1734 # so every symbol s terminates
1735 # so production p terminates.
1736 p_terminates = True
1738 if p_terminates:
1739 # symbol n terminates!
1740 if not terminates[n]:
1741 terminates[n] = True
1742 some_change = True
1743 # Don't need to consider any more productions for this n.
1744 break
1746 if not some_change:
1747 break
1749 infinite = []
1750 for s, term in terminates.items():
1751 if not term:
1752 if s not in self.Prodnames and s not in self.Terminals and s != "error":
1753 # s is used-but-not-defined, and we've already warned of that,
1754 # so it would be overkill to say that it's also non-terminating.
1755 pass
1756 else:
1757 infinite.append(s)
1759 return infinite
1761 # -----------------------------------------------------------------------------
1762 # undefined_symbols()
1763 #
1764 # Find all symbols that were used the grammar, but not defined as tokens or
1765 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1766 # and prod is the production where the symbol was used.
1767 # -----------------------------------------------------------------------------
1768 def undefined_symbols(self):
1769 result = []
1770 for p in self.Productions:
1771 if not p:
1772 continue
1774 for s in p.prod:
1775 if s not in self.Prodnames and s not in self.Terminals and s != "error":
1776 result.append((s, p))
1777 return result
1779 # -----------------------------------------------------------------------------
1780 # unused_terminals()
1781 #
1782 # Find all terminals that were defined, but not used by the grammar. Returns
1783 # a list of all symbols.
1784 # -----------------------------------------------------------------------------
1785 def unused_terminals(self):
1786 unused_tok = []
1787 for s, v in self.Terminals.items():
1788 if s != "error" and not v:
1789 unused_tok.append(s)
1791 return unused_tok
1793 # ------------------------------------------------------------------------------
1794 # unused_rules()
1795 #
1796 # Find all grammar rules that were defined, but not used (maybe not reachable)
1797 # Returns a list of productions.
1798 # ------------------------------------------------------------------------------
1800 def unused_rules(self):
1801 unused_prod = []
1802 for s, v in self.Nonterminals.items():
1803 if not v:
1804 p = self.Prodnames[s][0]
1805 unused_prod.append(p)
1806 return unused_prod
1808 # -----------------------------------------------------------------------------
1809 # unused_precedence()
1810 #
1811 # Returns a list of tuples (term,precedence) corresponding to precedence
1812 # rules that were never used by the grammar. term is the name of the terminal
1813 # on which precedence was applied and precedence is a string such as 'left' or
1814 # 'right' corresponding to the type of precedence.
1815 # -----------------------------------------------------------------------------
1817 def unused_precedence(self):
1818 unused = []
1819 for termname in self.Precedence:
1820 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1821 unused.append((termname, self.Precedence[termname][0]))
1823 return unused
1825 # -------------------------------------------------------------------------
1826 # _first()
1827 #
1828 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1829 #
1830 # During execution of compute_first1, the result may be incomplete.
1831 # Afterward (e.g., when called from compute_follow()), it will be complete.
1832 # -------------------------------------------------------------------------
1833 def _first(self, beta):
1834 # We are computing First(x1,x2,x3,...,xn)
1835 result = []
1836 for x in beta:
1837 x_produces_empty = False
1839 # Add all the non-<empty> symbols of First[x] to the result.
1840 for f in self.First[x]:
1841 if f == "<empty>":
1842 x_produces_empty = True
1843 else:
1844 if f not in result:
1845 result.append(f)
1847 if x_produces_empty:
1848 # We have to consider the next x in beta,
1849 # i.e. stay in the loop.
1850 pass
1851 else:
1852 # We don't have to consider any further symbols in beta.
1853 break
1854 else:
1855 # There was no 'break' from the loop,
1856 # so x_produces_empty was true for all x in beta,
1857 # so beta produces empty as well.
1858 result.append("<empty>")
1860 return result
1862 # -------------------------------------------------------------------------
1863 # compute_first()
1864 #
1865 # Compute the value of FIRST1(X) for all symbols
1866 # -------------------------------------------------------------------------
1867 def compute_first(self):
1868 if self.First:
1869 return self.First
1871 # Terminals:
1872 for t in self.Terminals:
1873 self.First[t] = [t]
1875 self.First["$end"] = ["$end"]
1877 # Nonterminals:
1879 # Initialize to the empty set:
1880 for n in self.Nonterminals:
1881 self.First[n] = []
1883 # Then propagate symbols until no change:
1884 while True:
1885 some_change = False
1886 for n in self.Nonterminals:
1887 for p in self.Prodnames[n]:
1888 for f in self._first(p.prod):
1889 if f not in self.First[n]:
1890 self.First[n].append(f)
1891 some_change = True
1892 if not some_change:
1893 break
1895 return self.First
1897 # ---------------------------------------------------------------------
1898 # compute_follow()
1899 #
1900 # Computes all of the follow sets for every non-terminal symbol. The
1901 # follow set is the set of all symbols that might follow a given
1902 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1903 # ---------------------------------------------------------------------
1904 def compute_follow(self, start=None):
1905 # If already computed, return the result
1906 if self.Follow:
1907 return self.Follow
1909 # If first sets not computed yet, do that first.
1910 if not self.First:
1911 self.compute_first()
1913 # Add '$end' to the follow list of the start symbol
1914 for k in self.Nonterminals:
1915 self.Follow[k] = []
1917 if not start:
1918 start = self.Productions[1].name
1920 self.Follow[start] = ["$end"]
1922 while True:
1923 didadd = False
1924 for p in self.Productions[1:]:
1925 # Here is the production set
1926 for i, B in enumerate(p.prod):
1927 if B in self.Nonterminals:
1928 # Okay. We got a non-terminal in a production
1929 fst = self._first(p.prod[i + 1 :])
1930 hasempty = False
1931 for f in fst:
1932 if f != "<empty>" and f not in self.Follow[B]:
1933 self.Follow[B].append(f)
1934 didadd = True
1935 if f == "<empty>":
1936 hasempty = True
1937 if hasempty or i == (len(p.prod) - 1):
1938 # Add elements of follow(a) to follow(b)
1939 for f in self.Follow[p.name]:
1940 if f not in self.Follow[B]:
1941 self.Follow[B].append(f)
1942 didadd = True
1943 if not didadd:
1944 break
1945 return self.Follow
1947 # -----------------------------------------------------------------------------
1948 # build_lritems()
1949 #
1950 # This function walks the list of productions and builds a complete set of the
1951 # LR items. The LR items are stored in two ways: First, they are uniquely
1952 # numbered and placed in the list _lritems. Second, a linked list of LR items
1953 # is built for each production. For example:
1954 #
1955 # E -> E PLUS E
1956 #
1957 # Creates the list
1958 #
1959 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1960 # -----------------------------------------------------------------------------
1962 def build_lritems(self):
1963 for p in self.Productions:
1964 lastlri = p
1965 i = 0
1966 lr_items = []
1967 while True:
1968 if i > len(p):
1969 lri = None
1970 else:
1971 lri = LRItem(p, i)
1972 # Precompute the list of productions immediately following
1973 try:
1974 lri.lr_after = self.Prodnames[lri.prod[i + 1]]
1975 except (IndexError, KeyError):
1976 lri.lr_after = []
1977 try:
1978 lri.lr_before = lri.prod[i - 1]
1979 except IndexError:
1980 lri.lr_before = None
1982 lastlri.lr_next = lri
1983 if not lri:
1984 break
1985 lr_items.append(lri)
1986 lastlri = lri
1987 i += 1
1988 p.lr_items = lr_items
1991# -----------------------------------------------------------------------------
1992# == Class LRTable ==
1993#
1994# This basic class represents a basic table of LR parsing information.
1995# Methods for generating the tables are not defined here. They are defined
1996# in the derived class LRGeneratedTable.
1997# -----------------------------------------------------------------------------
2000class VersionError(YaccError):
2001 pass
2004class LRTable(object):
2005 def __init__(self):
2006 self.lr_action = None
2007 self.lr_goto = None
2008 self.lr_productions = None
2009 self.lr_method = None
2011 def read_table(self, module):
2012 if isinstance(module, types.ModuleType):
2013 parsetab = module
2014 else:
2015 exec("import %s" % module)
2016 parsetab = sys.modules[module]
2018 if parsetab._tabversion != __tabversion__:
2019 raise VersionError("yacc table file version is out of date")
2021 self.lr_action = parsetab._lr_action
2022 self.lr_goto = parsetab._lr_goto
2024 self.lr_productions = []
2025 for p in parsetab._lr_productions:
2026 self.lr_productions.append(MiniProduction(*p))
2028 self.lr_method = parsetab._lr_method
2029 return parsetab._lr_signature
2031 def read_pickle(self, filename):
2032 try:
2033 import cPickle as pickle
2034 except ImportError:
2035 import pickle
2037 if not os.path.exists(filename):
2038 raise ImportError
2040 in_f = open(filename, "rb")
2042 tabversion = pickle.load(in_f)
2043 if tabversion != __tabversion__:
2044 raise VersionError("yacc table file version is out of date")
2045 self.lr_method = pickle.load(in_f)
2046 signature = pickle.load(in_f)
2047 self.lr_action = pickle.load(in_f)
2048 self.lr_goto = pickle.load(in_f)
2049 productions = pickle.load(in_f)
2051 self.lr_productions = []
2052 for p in productions:
2053 self.lr_productions.append(MiniProduction(*p))
2055 in_f.close()
2056 return signature
2058 # Bind all production function names to callable objects in pdict
2059 def bind_callables(self, pdict):
2060 for p in self.lr_productions:
2061 p.bind(pdict)
2064# -----------------------------------------------------------------------------
2065# === LR Generator ===
2066#
2067# The following classes and functions are used to generate LR parsing tables on
2068# a grammar.
2069# -----------------------------------------------------------------------------
2071# -----------------------------------------------------------------------------
2072# digraph()
2073# traverse()
2074#
2075# The following two functions are used to compute set valued functions
2076# of the form:
2077#
2078# F(x) = F'(x) U U{F(y) | x R y}
2079#
2080# This is used to compute the values of Read() sets as well as FOLLOW sets
2081# in LALR(1) generation.
2082#
2083# Inputs: X - An input set
2084# R - A relation
2085# FP - Set-valued function
2086# ------------------------------------------------------------------------------
2089def digraph(X, R, FP):
2090 N = {}
2091 for x in X:
2092 N[x] = 0
2093 stack = []
2094 F = {}
2095 for x in X:
2096 if N[x] == 0:
2097 traverse(x, N, stack, F, X, R, FP)
2098 return F
2101def traverse(x, N, stack, F, X, R, FP):
2102 stack.append(x)
2103 d = len(stack)
2104 N[x] = d
2105 F[x] = FP(x) # F(X) <- F'(x)
2107 rel = R(x) # Get y's related to x
2108 for y in rel:
2109 if N[y] == 0:
2110 traverse(y, N, stack, F, X, R, FP)
2111 N[x] = min(N[x], N[y])
2112 for a in F.get(y, []):
2113 if a not in F[x]:
2114 F[x].append(a)
2115 if N[x] == d:
2116 N[stack[-1]] = MAXINT
2117 F[stack[-1]] = F[x]
2118 element = stack.pop()
2119 while element != x:
2120 N[stack[-1]] = MAXINT
2121 F[stack[-1]] = F[x]
2122 element = stack.pop()
2125class LALRError(YaccError):
2126 pass
2129# -----------------------------------------------------------------------------
2130# == LRGeneratedTable ==
2131#
2132# This class implements the LR table generation algorithm. There are no
2133# public methods except for write()
2134# -----------------------------------------------------------------------------
2137class LRGeneratedTable(LRTable):
2138 def __init__(self, grammar, method="LALR", log=None):
2139 if method not in ["SLR", "LALR"]:
2140 raise LALRError("Unsupported method %s" % method)
2142 self.grammar = grammar
2143 self.lr_method = method
2145 # Set up the logger
2146 if not log:
2147 log = NullLogger()
2148 self.log = log
2150 # Internal attributes
2151 self.lr_action = {} # Action table
2152 self.lr_goto = {} # Goto table
2153 self.lr_productions = grammar.Productions # Copy of grammar Production array
2154 self.lr_goto_cache = {} # Cache of computed gotos
2155 self.lr0_cidhash = {} # Cache of closures
2157 self._add_count = 0 # Internal counter used to detect cycles
2159 # Diagonistic information filled in by the table generator
2160 self.sr_conflict = 0
2161 self.rr_conflict = 0
2162 self.conflicts = [] # List of conflicts
2164 self.sr_conflicts = []
2165 self.rr_conflicts = []
2167 # Build the tables
2168 self.grammar.build_lritems()
2169 self.grammar.compute_first()
2170 self.grammar.compute_follow()
2171 self.lr_parse_table()
2173 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2175 def lr0_closure(self, I):
2176 self._add_count += 1
2178 # Add everything in I to J
2179 J = I[:]
2180 didadd = True
2181 while didadd:
2182 didadd = False
2183 for j in J:
2184 for x in j.lr_after:
2185 if getattr(x, "lr0_added", 0) == self._add_count:
2186 continue
2187 # Add B --> .G to J
2188 J.append(x.lr_next)
2189 x.lr0_added = self._add_count
2190 didadd = True
2192 return J
2194 # Compute the LR(0) goto function goto(I,X) where I is a set
2195 # of LR(0) items and X is a grammar symbol. This function is written
2196 # in a way that guarantees uniqueness of the generated goto sets
2197 # (i.e. the same goto set will never be returned as two different Python
2198 # objects). With uniqueness, we can later do fast set comparisons using
2199 # id(obj) instead of element-wise comparison.
2201 def lr0_goto(self, I, x):
2202 # First we look for a previously cached entry
2203 g = self.lr_goto_cache.get((id(I), x))
2204 if g:
2205 return g
2207 # Now we generate the goto set in a way that guarantees uniqueness
2208 # of the result
2210 s = self.lr_goto_cache.get(x)
2211 if not s:
2212 s = {}
2213 self.lr_goto_cache[x] = s
2215 gs = []
2216 for p in I:
2217 n = p.lr_next
2218 if n and n.lr_before == x:
2219 s1 = s.get(id(n))
2220 if not s1:
2221 s1 = {}
2222 s[id(n)] = s1
2223 gs.append(n)
2224 s = s1
2225 g = s.get("$end")
2226 if not g:
2227 if gs:
2228 g = self.lr0_closure(gs)
2229 s["$end"] = g
2230 else:
2231 s["$end"] = gs
2232 self.lr_goto_cache[(id(I), x)] = g
2233 return g
2235 # Compute the LR(0) sets of item function
2236 def lr0_items(self):
2237 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2238 i = 0
2239 for I in C:
2240 self.lr0_cidhash[id(I)] = i
2241 i += 1
2243 # Loop over the items in C and each grammar symbols
2244 i = 0
2245 while i < len(C):
2246 I = C[i]
2247 i += 1
2249 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2250 asyms = {}
2251 for ii in I:
2252 for s in ii.usyms:
2253 asyms[s] = None
2255 for x in asyms:
2256 g = self.lr0_goto(I, x)
2257 if not g or id(g) in self.lr0_cidhash:
2258 continue
2259 self.lr0_cidhash[id(g)] = len(C)
2260 C.append(g)
2262 return C
2264 # -----------------------------------------------------------------------------
2265 # ==== LALR(1) Parsing ====
2266 #
2267 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2268 # relying upon Follow() sets when performing reductions, a more selective
2269 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2270 # Thus, we mainly just have to focus on calculating the lookahead sets.
2271 #
2272 # The method used here is due to DeRemer and Pennelo (1982).
2273 #
2274 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2275 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2276 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2277 #
2278 # Further details can also be found in:
2279 #
2280 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2281 # McGraw-Hill Book Company, (1985).
2282 #
2283 # -----------------------------------------------------------------------------
2285 # -----------------------------------------------------------------------------
2286 # compute_nullable_nonterminals()
2287 #
2288 # Creates a dictionary containing all of the non-terminals that might produce
2289 # an empty production.
2290 # -----------------------------------------------------------------------------
2292 def compute_nullable_nonterminals(self):
2293 nullable = set()
2294 num_nullable = 0
2295 while True:
2296 for p in self.grammar.Productions[1:]:
2297 if p.len == 0:
2298 nullable.add(p.name)
2299 continue
2300 for t in p.prod:
2301 if t not in nullable:
2302 break
2303 else:
2304 nullable.add(p.name)
2305 if len(nullable) == num_nullable:
2306 break
2307 num_nullable = len(nullable)
2308 return nullable
2310 # -----------------------------------------------------------------------------
2311 # find_nonterminal_trans(C)
2312 #
2313 # Given a set of LR(0) items, this functions finds all of the non-terminal
2314 # transitions. These are transitions in which a dot appears immediately before
2315 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2316 # is the state number and N is the nonterminal symbol.
2317 #
2318 # The input C is the set of LR(0) items.
2319 # -----------------------------------------------------------------------------
2321 def find_nonterminal_transitions(self, C):
2322 trans = []
2323 for stateno, state in enumerate(C):
2324 for p in state:
2325 if p.lr_index < p.len - 1:
2326 t = (stateno, p.prod[p.lr_index + 1])
2327 if t[1] in self.grammar.Nonterminals:
2328 if t not in trans:
2329 trans.append(t)
2330 return trans
2332 # -----------------------------------------------------------------------------
2333 # dr_relation()
2334 #
2335 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2336 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2337 #
2338 # Returns a list of terminals.
2339 # -----------------------------------------------------------------------------
2341 def dr_relation(self, C, trans, nullable):
2342 state, N = trans
2343 terms = []
2345 g = self.lr0_goto(C[state], N)
2346 for p in g:
2347 if p.lr_index < p.len - 1:
2348 a = p.prod[p.lr_index + 1]
2349 if a in self.grammar.Terminals:
2350 if a not in terms:
2351 terms.append(a)
2353 # This extra bit is to handle the start state
2354 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2355 terms.append("$end")
2357 return terms
2359 # -----------------------------------------------------------------------------
2360 # reads_relation()
2361 #
2362 # Computes the READS() relation (p,A) READS (t,C).
2363 # -----------------------------------------------------------------------------
2365 def reads_relation(self, C, trans, empty):
2366 # Look for empty transitions
2367 rel = []
2368 state, N = trans
2370 g = self.lr0_goto(C[state], N)
2371 j = self.lr0_cidhash.get(id(g), -1)
2372 for p in g:
2373 if p.lr_index < p.len - 1:
2374 a = p.prod[p.lr_index + 1]
2375 if a in empty:
2376 rel.append((j, a))
2378 return rel
2380 # -----------------------------------------------------------------------------
2381 # compute_lookback_includes()
2382 #
2383 # Determines the lookback and includes relations
2384 #
2385 # LOOKBACK:
2386 #
2387 # This relation is determined by running the LR(0) state machine forward.
2388 # For example, starting with a production "N : . A B C", we run it forward
2389 # to obtain "N : A B C ." We then build a relationship between this final
2390 # state and the starting state. These relationships are stored in a dictionary
2391 # lookdict.
2392 #
2393 # INCLUDES:
2394 #
2395 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2396 #
2397 # This relation is used to determine non-terminal transitions that occur
2398 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2399 # if the following holds:
2400 #
2401 # B -> LAT, where T -> epsilon and p' -L-> p
2402 #
2403 # L is essentially a prefix (which may be empty), T is a suffix that must be
2404 # able to derive an empty string. State p' must lead to state p with the string L.
2405 #
2406 # -----------------------------------------------------------------------------
2408 def compute_lookback_includes(self, C, trans, nullable):
2409 lookdict = {} # Dictionary of lookback relations
2410 includedict = {} # Dictionary of include relations
2412 # Make a dictionary of non-terminal transitions
2413 dtrans = {}
2414 for t in trans:
2415 dtrans[t] = 1
2417 # Loop over all transitions and compute lookbacks and includes
2418 for state, N in trans:
2419 lookb = []
2420 includes = []
2421 for p in C[state]:
2422 if p.name != N:
2423 continue
2425 # Okay, we have a name match. We now follow the production all the way
2426 # through the state machine until we get the . on the right hand side
2428 lr_index = p.lr_index
2429 j = state
2430 while lr_index < p.len - 1:
2431 lr_index = lr_index + 1
2432 t = p.prod[lr_index]
2434 # Check to see if this symbol and state are a non-terminal transition
2435 if (j, t) in dtrans:
2436 # Yes. Okay, there is some chance that this is an includes relation
2437 # the only way to know for certain is whether the rest of the
2438 # production derives empty
2440 li = lr_index + 1
2441 while li < p.len:
2442 if p.prod[li] in self.grammar.Terminals:
2443 break # No forget it
2444 if p.prod[li] not in nullable:
2445 break
2446 li = li + 1
2447 else:
2448 # Appears to be a relation between (j,t) and (state,N)
2449 includes.append((j, t))
2451 g = self.lr0_goto(C[j], t) # Go to next set
2452 j = self.lr0_cidhash.get(id(g), -1) # Go to next state
2454 # When we get here, j is the final state, now we have to locate the production
2455 for r in C[j]:
2456 if r.name != p.name:
2457 continue
2458 if r.len != p.len:
2459 continue
2460 i = 0
2461 # This look is comparing a production ". A B C" with "A B C ."
2462 while i < r.lr_index:
2463 if r.prod[i] != p.prod[i + 1]:
2464 break
2465 i = i + 1
2466 else:
2467 lookb.append((j, r))
2468 for i in includes:
2469 if i not in includedict:
2470 includedict[i] = []
2471 includedict[i].append((state, N))
2472 lookdict[(state, N)] = lookb
2474 return lookdict, includedict
2476 # -----------------------------------------------------------------------------
2477 # compute_read_sets()
2478 #
2479 # Given a set of LR(0) items, this function computes the read sets.
2480 #
2481 # Inputs: C = Set of LR(0) items
2482 # ntrans = Set of nonterminal transitions
2483 # nullable = Set of empty transitions
2484 #
2485 # Returns a set containing the read sets
2486 # -----------------------------------------------------------------------------
2488 def compute_read_sets(self, C, ntrans, nullable):
2489 FP = lambda x: self.dr_relation(C, x, nullable)
2490 R = lambda x: self.reads_relation(C, x, nullable)
2491 F = digraph(ntrans, R, FP)
2492 return F
2494 # -----------------------------------------------------------------------------
2495 # compute_follow_sets()
2496 #
2497 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2498 # and an include set, this function computes the follow sets
2499 #
2500 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2501 #
2502 # Inputs:
2503 # ntrans = Set of nonterminal transitions
2504 # readsets = Readset (previously computed)
2505 # inclsets = Include sets (previously computed)
2506 #
2507 # Returns a set containing the follow sets
2508 # -----------------------------------------------------------------------------
2510 def compute_follow_sets(self, ntrans, readsets, inclsets):
2511 FP = lambda x: readsets[x]
2512 R = lambda x: inclsets.get(x, [])
2513 F = digraph(ntrans, R, FP)
2514 return F
2516 # -----------------------------------------------------------------------------
2517 # add_lookaheads()
2518 #
2519 # Attaches the lookahead symbols to grammar rules.
2520 #
2521 # Inputs: lookbacks - Set of lookback relations
2522 # followset - Computed follow set
2523 #
2524 # This function directly attaches the lookaheads to productions contained
2525 # in the lookbacks set
2526 # -----------------------------------------------------------------------------
2528 def add_lookaheads(self, lookbacks, followset):
2529 for trans, lb in lookbacks.items():
2530 # Loop over productions in lookback
2531 for state, p in lb:
2532 if state not in p.lookaheads:
2533 p.lookaheads[state] = []
2534 f = followset.get(trans, [])
2535 for a in f:
2536 if a not in p.lookaheads[state]:
2537 p.lookaheads[state].append(a)
2539 # -----------------------------------------------------------------------------
2540 # add_lalr_lookaheads()
2541 #
2542 # This function does all of the work of adding lookahead information for use
2543 # with LALR parsing
2544 # -----------------------------------------------------------------------------
2546 def add_lalr_lookaheads(self, C):
2547 # Determine all of the nullable nonterminals
2548 nullable = self.compute_nullable_nonterminals()
2550 # Find all non-terminal transitions
2551 trans = self.find_nonterminal_transitions(C)
2553 # Compute read sets
2554 readsets = self.compute_read_sets(C, trans, nullable)
2556 # Compute lookback/includes relations
2557 lookd, included = self.compute_lookback_includes(C, trans, nullable)
2559 # Compute LALR FOLLOW sets
2560 followsets = self.compute_follow_sets(trans, readsets, included)
2562 # Add all of the lookaheads
2563 self.add_lookaheads(lookd, followsets)
2565 # -----------------------------------------------------------------------------
2566 # lr_parse_table()
2567 #
2568 # This function constructs the parse tables for SLR or LALR
2569 # -----------------------------------------------------------------------------
2570 def lr_parse_table(self):
2571 Productions = self.grammar.Productions
2572 Precedence = self.grammar.Precedence
2573 goto = self.lr_goto # Goto array
2574 action = self.lr_action # Action array
2575 log = self.log # Logger for output
2577 actionp = {} # Action production array (temporary)
2579 log.info("Parsing method: %s", self.lr_method)
2581 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2582 # This determines the number of states
2584 C = self.lr0_items()
2586 if self.lr_method == "LALR":
2587 self.add_lalr_lookaheads(C)
2589 # Build the parser table, state by state
2590 st = 0
2591 for I in C:
2592 # Loop over each production in I
2593 actlist = [] # List of actions
2594 st_action = {}
2595 st_actionp = {}
2596 st_goto = {}
2597 log.info("")
2598 log.info("state %d", st)
2599 log.info("")
2600 for p in I:
2601 log.info(" (%d) %s", p.number, p)
2602 log.info("")
2604 for p in I:
2605 if p.len == p.lr_index + 1:
2606 if p.name == "S'":
2607 # Start symbol. Accept!
2608 st_action["$end"] = 0
2609 st_actionp["$end"] = p
2610 else:
2611 # We are at the end of a production. Reduce!
2612 if self.lr_method == "LALR":
2613 laheads = p.lookaheads[st]
2614 else:
2615 laheads = self.grammar.Follow[p.name]
2616 for a in laheads:
2617 actlist.append((a, p, "reduce using rule %d (%s)" % (p.number, p)))
2618 r = st_action.get(a)
2619 if r is not None:
2620 # Whoa. Have a shift/reduce or reduce/reduce conflict
2621 if r > 0:
2622 # Need to decide on shift or reduce here
2623 # By default we favor shifting. Need to add
2624 # some precedence rules here.
2626 # Shift precedence comes from the token
2627 sprec, slevel = Precedence.get(a, ("right", 0))
2629 # Reduce precedence comes from rule being reduced (p)
2630 rprec, rlevel = Productions[p.number].prec
2632 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == "left")):
2633 # We really need to reduce here.
2634 st_action[a] = -p.number
2635 st_actionp[a] = p
2636 if not slevel and not rlevel:
2637 log.info(" ! shift/reduce conflict for %s resolved as reduce", a)
2638 self.sr_conflicts.append((st, a, "reduce"))
2639 Productions[p.number].reduced += 1
2640 elif (slevel == rlevel) and (rprec == "nonassoc"):
2641 st_action[a] = None
2642 else:
2643 # Hmmm. Guess we'll keep the shift
2644 if not rlevel:
2645 log.info(" ! shift/reduce conflict for %s resolved as shift", a)
2646 self.sr_conflicts.append((st, a, "shift"))
2647 elif r < 0:
2648 # Reduce/reduce conflict. In this case, we favor the rule
2649 # that was defined first in the grammar file
2650 oldp = Productions[-r]
2651 pp = Productions[p.number]
2652 if oldp.line > pp.line:
2653 st_action[a] = -p.number
2654 st_actionp[a] = p
2655 chosenp, rejectp = pp, oldp
2656 Productions[p.number].reduced += 1
2657 Productions[oldp.number].reduced -= 1
2658 else:
2659 chosenp, rejectp = oldp, pp
2660 self.rr_conflicts.append((st, chosenp, rejectp))
2661 log.info(
2662 " ! reduce/reduce conflict for %s resolved using rule %d (%s)",
2663 a,
2664 st_actionp[a].number,
2665 st_actionp[a],
2666 )
2667 else:
2668 raise LALRError("Unknown conflict in state %d" % st)
2669 else:
2670 st_action[a] = -p.number
2671 st_actionp[a] = p
2672 Productions[p.number].reduced += 1
2673 else:
2674 i = p.lr_index
2675 a = p.prod[i + 1] # Get symbol right after the "."
2676 if a in self.grammar.Terminals:
2677 g = self.lr0_goto(I, a)
2678 j = self.lr0_cidhash.get(id(g), -1)
2679 if j >= 0:
2680 # We are in a shift state
2681 actlist.append((a, p, "shift and go to state %d" % j))
2682 r = st_action.get(a)
2683 if r is not None:
2684 # Whoa have a shift/reduce or shift/shift conflict
2685 if r > 0:
2686 if r != j:
2687 raise LALRError("Shift/shift conflict in state %d" % st)
2688 elif r < 0:
2689 # Do a precedence check.
2690 # - if precedence of reduce rule is higher, we reduce.
2691 # - if precedence of reduce is same and left assoc, we reduce.
2692 # - otherwise we shift
2694 # Shift precedence comes from the token
2695 sprec, slevel = Precedence.get(a, ("right", 0))
2697 # Reduce precedence comes from the rule that could have been reduced
2698 rprec, rlevel = Productions[st_actionp[a].number].prec
2700 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == "right")):
2701 # We decide to shift here... highest precedence to shift
2702 Productions[st_actionp[a].number].reduced -= 1
2703 st_action[a] = j
2704 st_actionp[a] = p
2705 if not rlevel:
2706 log.info(" ! shift/reduce conflict for %s resolved as shift", a)
2707 self.sr_conflicts.append((st, a, "shift"))
2708 elif (slevel == rlevel) and (rprec == "nonassoc"):
2709 st_action[a] = None
2710 else:
2711 # Hmmm. Guess we'll keep the reduce
2712 if not slevel and not rlevel:
2713 log.info(" ! shift/reduce conflict for %s resolved as reduce", a)
2714 self.sr_conflicts.append((st, a, "reduce"))
2716 else:
2717 raise LALRError("Unknown conflict in state %d" % st)
2718 else:
2719 st_action[a] = j
2720 st_actionp[a] = p
2722 # Print the actions associated with each terminal
2723 _actprint = {}
2724 for a, p, m in actlist:
2725 if a in st_action:
2726 if p is st_actionp[a]:
2727 log.info(" %-15s %s", a, m)
2728 _actprint[(a, m)] = 1
2729 log.info("")
2730 # Print the actions that were not used. (debugging)
2731 not_used = 0
2732 for a, p, m in actlist:
2733 if a in st_action:
2734 if p is not st_actionp[a]:
2735 if not (a, m) in _actprint:
2736 log.debug(" ! %-15s [ %s ]", a, m)
2737 not_used = 1
2738 _actprint[(a, m)] = 1
2739 if not_used:
2740 log.debug("")
2742 # Construct the goto table for this state
2744 nkeys = {}
2745 for ii in I:
2746 for s in ii.usyms:
2747 if s in self.grammar.Nonterminals:
2748 nkeys[s] = None
2749 for n in nkeys:
2750 g = self.lr0_goto(I, n)
2751 j = self.lr0_cidhash.get(id(g), -1)
2752 if j >= 0:
2753 st_goto[n] = j
2754 log.info(" %-30s shift and go to state %d", n, j)
2756 action[st] = st_action
2757 actionp[st] = st_actionp
2758 goto[st] = st_goto
2759 st += 1
2761 # -----------------------------------------------------------------------------
2762 # write()
2763 #
2764 # This function writes the LR parsing tables to a file
2765 # -----------------------------------------------------------------------------
2767 def write_table(self, tabmodule, outputdir="", signature=""):
2768 if isinstance(tabmodule, types.ModuleType):
2769 raise IOError("Won't overwrite existing tabmodule")
2771 basemodulename = tabmodule.split(".")[-1]
2772 filename = os.path.join(outputdir, basemodulename) + ".py"
2773 try:
2774 f = open(filename, "w")
2776 f.write(
2777 """
2778# %s
2779# This file is automatically generated. Do not edit.
2780# pylint: disable=W,C,R
2781_tabversion = %r
2783_lr_method = %r
2785_lr_signature = %r
2786 """
2787 % (os.path.basename(filename), __tabversion__, self.lr_method, signature)
2788 )
2790 # Change smaller to 0 to go back to original tables
2791 smaller = 1
2793 # Factor out names to try and make smaller
2794 if smaller:
2795 items = {}
2797 for s, nd in self.lr_action.items():
2798 for name, v in nd.items():
2799 i = items.get(name)
2800 if not i:
2801 i = ([], [])
2802 items[name] = i
2803 i[0].append(s)
2804 i[1].append(v)
2806 f.write("\n_lr_action_items = {")
2807 for k, v in items.items():
2808 f.write("%r:([" % k)
2809 for i in v[0]:
2810 f.write("%r," % i)
2811 f.write("],[")
2812 for i in v[1]:
2813 f.write("%r," % i)
2815 f.write("]),")
2816 f.write("}\n")
2818 f.write(
2819 """
2820_lr_action = {}
2821for _k, _v in _lr_action_items.items():
2822 for _x,_y in zip(_v[0],_v[1]):
2823 if not _x in _lr_action: _lr_action[_x] = {}
2824 _lr_action[_x][_k] = _y
2825del _lr_action_items
2826"""
2827 )
2829 else:
2830 f.write("\n_lr_action = { ")
2831 for k, v in self.lr_action.items():
2832 f.write("(%r,%r):%r," % (k[0], k[1], v))
2833 f.write("}\n")
2835 if smaller:
2836 # Factor out names to try and make smaller
2837 items = {}
2839 for s, nd in self.lr_goto.items():
2840 for name, v in nd.items():
2841 i = items.get(name)
2842 if not i:
2843 i = ([], [])
2844 items[name] = i
2845 i[0].append(s)
2846 i[1].append(v)
2848 f.write("\n_lr_goto_items = {")
2849 for k, v in items.items():
2850 f.write("%r:([" % k)
2851 for i in v[0]:
2852 f.write("%r," % i)
2853 f.write("],[")
2854 for i in v[1]:
2855 f.write("%r," % i)
2857 f.write("]),")
2858 f.write("}\n")
2860 f.write(
2861 """
2862_lr_goto = {}
2863for _k, _v in _lr_goto_items.items():
2864 for _x, _y in zip(_v[0], _v[1]):
2865 if not _x in _lr_goto: _lr_goto[_x] = {}
2866 _lr_goto[_x][_k] = _y
2867del _lr_goto_items
2868"""
2869 )
2870 else:
2871 f.write("\n_lr_goto = { ")
2872 for k, v in self.lr_goto.items():
2873 f.write("(%r,%r):%r," % (k[0], k[1], v))
2874 f.write("}\n")
2876 # Write production table
2877 f.write("_lr_productions = [\n")
2878 for p in self.lr_productions:
2879 if p.func:
2880 f.write(
2881 " (%r,%r,%d,%r,%r,%d),\n"
2882 % (p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)
2883 )
2884 else:
2885 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p), p.name, p.len))
2886 f.write("]\n")
2887 f.close()
2889 except IOError as e:
2890 raise
2892 # -----------------------------------------------------------------------------
2893 # pickle_table()
2894 #
2895 # This function pickles the LR parsing tables to a supplied file object
2896 # -----------------------------------------------------------------------------
2898 def pickle_table(self, filename, signature=""):
2899 try:
2900 import cPickle as pickle
2901 except ImportError:
2902 import pickle
2903 with open(filename, "wb") as outf:
2904 pickle.dump(__tabversion__, outf, pickle_protocol)
2905 pickle.dump(self.lr_method, outf, pickle_protocol)
2906 pickle.dump(signature, outf, pickle_protocol)
2907 pickle.dump(self.lr_action, outf, pickle_protocol)
2908 pickle.dump(self.lr_goto, outf, pickle_protocol)
2910 outp = []
2911 for p in self.lr_productions:
2912 if p.func:
2913 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2914 else:
2915 outp.append((str(p), p.name, p.len, None, None, None))
2916 pickle.dump(outp, outf, pickle_protocol)
2919# -----------------------------------------------------------------------------
2920# === INTROSPECTION ===
2921#
2922# The following functions and classes are used to implement the PLY
2923# introspection features followed by the yacc() function itself.
2924# -----------------------------------------------------------------------------
2926# -----------------------------------------------------------------------------
2927# get_caller_module_dict()
2928#
2929# This function returns a dictionary containing all of the symbols defined within
2930# a caller further down the call stack. This is used to get the environment
2931# associated with the yacc() call if none was provided.
2932# -----------------------------------------------------------------------------
2935def get_caller_module_dict(levels):
2936 f = sys._getframe(levels)
2937 ldict = f.f_globals.copy()
2938 if f.f_globals != f.f_locals:
2939 ldict.update(f.f_locals)
2940 return ldict
2943# -----------------------------------------------------------------------------
2944# parse_grammar()
2945#
2946# This takes a raw grammar rule string and parses it into production data
2947# -----------------------------------------------------------------------------
2948def parse_grammar(doc, file, line):
2949 grammar = []
2950 # Split the doc string into lines
2951 pstrings = doc.splitlines()
2952 lastp = None
2953 dline = line
2954 for ps in pstrings:
2955 dline += 1
2956 p = ps.split()
2957 if not p:
2958 continue
2959 try:
2960 if p[0] == "|":
2961 # This is a continuation of a previous rule
2962 if not lastp:
2963 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2964 prodname = lastp
2965 syms = p[1:]
2966 else:
2967 prodname = p[0]
2968 lastp = prodname
2969 syms = p[2:]
2970 assign = p[1]
2971 if assign != ":" and assign != "::=":
2972 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2974 grammar.append((file, dline, prodname, syms))
2975 except SyntaxError:
2976 raise
2977 except Exception:
2978 raise SyntaxError("%s:%d: Syntax error in rule %r" % (file, dline, ps.strip()))
2980 return grammar
2983# -----------------------------------------------------------------------------
2984# ParserReflect()
2985#
2986# This class represents information extracted for building a parser including
2987# start symbol, error function, tokens, precedence list, action functions,
2988# etc.
2989# -----------------------------------------------------------------------------
2990class ParserReflect(object):
2991 def __init__(self, pdict, log=None):
2992 self.pdict = pdict
2993 self.start = None
2994 self.error_func = None
2995 self.tokens = None
2996 self.modules = set()
2997 self.grammar = []
2998 self.error = False
3000 if log is None:
3001 self.log = PlyLogger(sys.stderr)
3002 else:
3003 self.log = log
3005 # Get all of the basic information
3006 def get_all(self):
3007 self.get_start()
3008 self.get_error_func()
3009 self.get_tokens()
3010 self.get_precedence()
3011 self.get_pfunctions()
3013 # Validate all of the information
3014 def validate_all(self):
3015 self.validate_start()
3016 self.validate_error_func()
3017 self.validate_tokens()
3018 self.validate_precedence()
3019 self.validate_pfunctions()
3020 self.validate_modules()
3021 return self.error
3023 # Compute a signature over the grammar
3024 def signature(self):
3025 parts = []
3026 try:
3027 if self.start:
3028 parts.append(self.start)
3029 if self.prec:
3030 parts.append("".join(["".join(p) for p in self.prec]))
3031 if self.tokens:
3032 parts.append(" ".join(self.tokens))
3033 for f in self.pfuncs:
3034 if f[3]:
3035 parts.append(f[3])
3036 except (TypeError, ValueError):
3037 pass
3038 return "".join(parts)
3040 # -----------------------------------------------------------------------------
3041 # validate_modules()
3042 #
3043 # This method checks to see if there are duplicated p_rulename() functions
3044 # in the parser module file. Without this function, it is really easy for
3045 # users to make mistakes by cutting and pasting code fragments (and it's a real
3046 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
3047 # we just do a little regular expression pattern matching of def statements
3048 # to try and detect duplicates.
3049 # -----------------------------------------------------------------------------
3051 def validate_modules(self):
3052 # Match def p_funcname(
3053 fre = re.compile(r"\s*def\s+(p_[a-zA-Z_0-9]*)\(")
3055 for module in self.modules:
3056 try:
3057 lines, linen = inspect.getsourcelines(module)
3058 except IOError:
3059 continue
3061 counthash = {}
3062 for linen, line in enumerate(lines):
3063 linen += 1
3064 m = fre.match(line)
3065 if m:
3066 name = m.group(1)
3067 prev = counthash.get(name)
3068 if not prev:
3069 counthash[name] = linen
3070 else:
3071 filename = inspect.getsourcefile(module)
3072 self.log.warning(
3073 "%s:%d: Function %s redefined. Previously defined on line %d",
3074 filename,
3075 linen,
3076 name,
3077 prev,
3078 )
3080 # Get the start symbol
3081 def get_start(self):
3082 self.start = self.pdict.get("start")
3084 # Validate the start symbol
3085 def validate_start(self):
3086 if self.start is not None:
3087 if not isinstance(self.start, string_types):
3088 self.log.error("'start' must be a string")
3090 # Look for error handler
3091 def get_error_func(self):
3092 self.error_func = self.pdict.get("p_error")
3094 # Validate the error function
3095 def validate_error_func(self):
3096 if self.error_func:
3097 if isinstance(self.error_func, types.FunctionType):
3098 ismethod = 0
3099 elif isinstance(self.error_func, types.MethodType):
3100 ismethod = 1
3101 else:
3102 self.log.error("'p_error' defined, but is not a function or method")
3103 self.error = True
3104 return
3106 eline = self.error_func.__code__.co_firstlineno
3107 efile = self.error_func.__code__.co_filename
3108 module = inspect.getmodule(self.error_func)
3109 self.modules.add(module)
3111 argcount = self.error_func.__code__.co_argcount - ismethod
3112 if argcount != 1:
3113 self.log.error("%s:%d: p_error() requires 1 argument", efile, eline)
3114 self.error = True
3116 # Get the tokens map
3117 def get_tokens(self):
3118 tokens = self.pdict.get("tokens")
3119 if not tokens:
3120 self.log.error("No token list is defined")
3121 self.error = True
3122 return
3124 if not isinstance(tokens, (list, tuple)):
3125 self.log.error("tokens must be a list or tuple")
3126 self.error = True
3127 return
3129 if not tokens:
3130 self.log.error("tokens is empty")
3131 self.error = True
3132 return
3134 self.tokens = sorted(tokens)
3136 # Validate the tokens
3137 def validate_tokens(self):
3138 # Validate the tokens.
3139 if "error" in self.tokens:
3140 self.log.error("Illegal token name 'error'. Is a reserved word")
3141 self.error = True
3142 return
3144 terminals = set()
3145 for n in self.tokens:
3146 if n in terminals:
3147 self.log.warning("Token %r multiply defined", n)
3148 terminals.add(n)
3150 # Get the precedence map (if any)
3151 def get_precedence(self):
3152 self.prec = self.pdict.get("precedence")
3154 # Validate and parse the precedence map
3155 def validate_precedence(self):
3156 preclist = []
3157 if self.prec:
3158 if not isinstance(self.prec, (list, tuple)):
3159 self.log.error("precedence must be a list or tuple")
3160 self.error = True
3161 return
3162 for level, p in enumerate(self.prec):
3163 if not isinstance(p, (list, tuple)):
3164 self.log.error("Bad precedence table")
3165 self.error = True
3166 return
3168 if len(p) < 2:
3169 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)", p)
3170 self.error = True
3171 return
3172 assoc = p[0]
3173 if not isinstance(assoc, string_types):
3174 self.log.error("precedence associativity must be a string")
3175 self.error = True
3176 return
3177 for term in p[1:]:
3178 if not isinstance(term, string_types):
3179 self.log.error("precedence items must be strings")
3180 self.error = True
3181 return
3182 preclist.append((term, assoc, level + 1))
3183 self.preclist = preclist
3185 # Get all p_functions from the grammar
3186 def get_pfunctions(self):
3187 p_functions = []
3188 for name, item in self.pdict.items():
3189 if not name.startswith("p_") or name == "p_error":
3190 continue
3191 if isinstance(item, (types.FunctionType, types.MethodType)):
3192 line = getattr(item, "co_firstlineno", item.__code__.co_firstlineno)
3193 module = inspect.getmodule(item)
3194 p_functions.append((line, module, name, item.__doc__))
3196 # Sort all of the actions by line number; make sure to stringify
3197 # modules to make them sortable, since `line` may not uniquely sort all
3198 # p functions
3199 p_functions.sort(
3200 key=lambda p_function: (p_function[0], str(p_function[1]), p_function[2], p_function[3])
3201 )
3202 self.pfuncs = p_functions
3204 # Validate all of the p_functions
3205 def validate_pfunctions(self):
3206 grammar = []
3207 # Check for non-empty symbols
3208 if len(self.pfuncs) == 0:
3209 self.log.error("no rules of the form p_rulename are defined")
3210 self.error = True
3211 return
3213 for line, module, name, doc in self.pfuncs:
3214 file = inspect.getsourcefile(module)
3215 func = self.pdict[name]
3216 if isinstance(func, types.MethodType):
3217 reqargs = 2
3218 else:
3219 reqargs = 1
3220 if func.__code__.co_argcount > reqargs:
3221 self.log.error("%s:%d: Rule %r has too many arguments", file, line, func.__name__)
3222 self.error = True
3223 elif func.__code__.co_argcount < reqargs:
3224 self.log.error("%s:%d: Rule %r requires an argument", file, line, func.__name__)
3225 self.error = True
3226 elif not func.__doc__:
3227 self.log.warning(
3228 "%s:%d: No documentation string specified in function %r (ignored)",
3229 file,
3230 line,
3231 func.__name__,
3232 )
3233 else:
3234 try:
3235 parsed_g = parse_grammar(doc, file, line)
3236 for g in parsed_g:
3237 grammar.append((name, g))
3238 except SyntaxError as e:
3239 self.log.error(str(e))
3240 self.error = True
3242 # Looks like a valid grammar rule
3243 # Mark the file in which defined.
3244 self.modules.add(module)
3246 # Secondary validation step that looks for p_ definitions that are not functions
3247 # or functions that look like they might be grammar rules.
3249 for n, v in self.pdict.items():
3250 if n.startswith("p_") and isinstance(v, (types.FunctionType, types.MethodType)):
3251 continue
3252 if n.startswith("t_"):
3253 continue
3254 if n.startswith("p_") and n != "p_error":
3255 self.log.warning("%r not defined as a function", n)
3256 if (isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or (
3257 isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2
3258 ):
3259 if v.__doc__:
3260 try:
3261 doc = v.__doc__.split(" ")
3262 if doc[1] == ":":
3263 self.log.warning(
3264 "%s:%d: Possible grammar rule %r defined without p_ prefix",
3265 v.__code__.co_filename,
3266 v.__code__.co_firstlineno,
3267 n,
3268 )
3269 except IndexError:
3270 pass
3272 self.grammar = grammar
3275# -----------------------------------------------------------------------------
3276# yacc(module)
3277#
3278# Build a parser
3279# -----------------------------------------------------------------------------
3282def yacc(
3283 method="LALR",
3284 debug=yaccdebug,
3285 module=None,
3286 tabmodule=tab_module,
3287 start=None,
3288 check_recursion=True,
3289 optimize=False,
3290 write_tables=True,
3291 debugfile=debug_file,
3292 outputdir=None,
3293 debuglog=None,
3294 errorlog=None,
3295 picklefile=None,
3296):
3297 if tabmodule is None:
3298 tabmodule = tab_module
3300 # Reference to the parsing method of the last built parser
3301 global parse
3303 # If pickling is enabled, table files are not created
3304 if picklefile:
3305 write_tables = 0
3307 if errorlog is None:
3308 errorlog = PlyLogger(sys.stderr)
3310 # Get the module dictionary used for the parser
3311 if module:
3312 _items = [(k, getattr(module, k)) for k in dir(module)]
3313 pdict = dict(_items)
3314 # If no __file__ or __package__ attributes are available, try to obtain them
3315 # from the __module__ instead
3316 if "__file__" not in pdict:
3317 pdict["__file__"] = sys.modules[pdict["__module__"]].__file__
3318 if "__package__" not in pdict and "__module__" in pdict:
3319 if hasattr(sys.modules[pdict["__module__"]], "__package__"):
3320 pdict["__package__"] = sys.modules[pdict["__module__"]].__package__
3321 else:
3322 pdict = get_caller_module_dict(2)
3324 if outputdir is None:
3325 # If no output directory is set, the location of the output files
3326 # is determined according to the following rules:
3327 # - If tabmodule specifies a package, files go into that package directory
3328 # - Otherwise, files go in the same directory as the specifying module
3329 if isinstance(tabmodule, types.ModuleType):
3330 srcfile = tabmodule.__file__
3331 else:
3332 if "." not in tabmodule:
3333 srcfile = pdict["__file__"]
3334 else:
3335 parts = tabmodule.split(".")
3336 pkgname = ".".join(parts[:-1])
3337 exec("import %s" % pkgname)
3338 srcfile = getattr(sys.modules[pkgname], "__file__", "")
3339 outputdir = os.path.dirname(srcfile)
3341 # Determine if the module is package of a package or not.
3342 # If so, fix the tabmodule setting so that tables load correctly
3343 pkg = pdict.get("__package__")
3344 if pkg and isinstance(tabmodule, str):
3345 if "." not in tabmodule:
3346 tabmodule = pkg + "." + tabmodule
3348 # Set start symbol if it's specified directly using an argument
3349 if start is not None:
3350 pdict["start"] = start
3352 # Collect parser information from the dictionary
3353 pinfo = ParserReflect(pdict, log=errorlog)
3354 pinfo.get_all()
3356 if pinfo.error:
3357 raise YaccError("Unable to build parser")
3359 # Check signature against table files (if any)
3360 signature = pinfo.signature()
3362 # Read the tables
3363 try:
3364 lr = LRTable()
3365 if picklefile:
3366 read_signature = lr.read_pickle(picklefile)
3367 else:
3368 read_signature = lr.read_table(tabmodule)
3369 if optimize or (read_signature == signature):
3370 try:
3371 lr.bind_callables(pinfo.pdict)
3372 parser = LRParser(lr, pinfo.error_func)
3373 parse = parser.parse
3374 return parser
3375 except Exception as e:
3376 errorlog.warning("There was a problem loading the table file: %r", e)
3377 except VersionError as e:
3378 errorlog.warning(str(e))
3379 except ImportError:
3380 pass
3382 if debuglog is None:
3383 if debug:
3384 try:
3385 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), "w"))
3386 except IOError as e:
3387 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3388 debuglog = NullLogger()
3389 else:
3390 debuglog = NullLogger()
3392 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
3394 errors = False
3396 # Validate the parser information
3397 if pinfo.validate_all():
3398 raise YaccError("Unable to build parser")
3400 if not pinfo.error_func:
3401 errorlog.warning("no p_error() function is defined")
3403 # Create a grammar object
3404 grammar = Grammar(pinfo.tokens)
3406 # Set precedence level for terminals
3407 for term, assoc, level in pinfo.preclist:
3408 try:
3409 grammar.set_precedence(term, assoc, level)
3410 except GrammarError as e:
3411 errorlog.warning("%s", e)
3413 # Add productions to the grammar
3414 for funcname, gram in pinfo.grammar:
3415 file, line, prodname, syms = gram
3416 try:
3417 grammar.add_production(prodname, syms, funcname, file, line)
3418 except GrammarError as e:
3419 errorlog.error("%s", e)
3420 errors = True
3422 # Set the grammar start symbols
3423 try:
3424 if start is None:
3425 grammar.set_start(pinfo.start)
3426 else:
3427 grammar.set_start(start)
3428 except GrammarError as e:
3429 errorlog.error(str(e))
3430 errors = True
3432 if errors:
3433 raise YaccError("Unable to build parser")
3435 # Verify the grammar structure
3436 undefined_symbols = grammar.undefined_symbols()
3437 for sym, prod in undefined_symbols:
3438 errorlog.error(
3439 "%s:%d: Symbol %r used, but not defined as a token or a rule", prod.file, prod.line, sym
3440 )
3441 errors = True
3443 unused_terminals = grammar.unused_terminals()
3444 if unused_terminals:
3445 debuglog.info("")
3446 debuglog.info("Unused terminals:")
3447 debuglog.info("")
3448 for term in unused_terminals:
3449 errorlog.warning("Token %r defined, but not used", term)
3450 debuglog.info(" %s", term)
3452 # Print out all productions to the debug log
3453 if debug:
3454 debuglog.info("")
3455 debuglog.info("Grammar")
3456 debuglog.info("")
3457 for n, p in enumerate(grammar.Productions):
3458 debuglog.info("Rule %-5d %s", n, p)
3460 # Find unused non-terminals
3461 unused_rules = grammar.unused_rules()
3462 for prod in unused_rules:
3463 errorlog.warning("%s:%d: Rule %r defined, but not used", prod.file, prod.line, prod.name)
3465 if len(unused_terminals) == 1:
3466 errorlog.warning("There is 1 unused token")
3467 if len(unused_terminals) > 1:
3468 errorlog.warning("There are %d unused tokens", len(unused_terminals))
3470 if len(unused_rules) == 1:
3471 errorlog.warning("There is 1 unused rule")
3472 if len(unused_rules) > 1:
3473 errorlog.warning("There are %d unused rules", len(unused_rules))
3475 if debug:
3476 debuglog.info("")
3477 debuglog.info("Terminals, with rules where they appear")
3478 debuglog.info("")
3479 terms = list(grammar.Terminals)
3480 terms.sort()
3481 for term in terms:
3482 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
3484 debuglog.info("")
3485 debuglog.info("Nonterminals, with rules where they appear")
3486 debuglog.info("")
3487 nonterms = list(grammar.Nonterminals)
3488 nonterms.sort()
3489 for nonterm in nonterms:
3490 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
3491 debuglog.info("")
3493 if check_recursion:
3494 unreachable = grammar.find_unreachable()
3495 for u in unreachable:
3496 errorlog.warning("Symbol %r is unreachable", u)
3498 infinite = grammar.infinite_cycles()
3499 for inf in infinite:
3500 errorlog.error("Infinite recursion detected for symbol %r", inf)
3501 errors = True
3503 unused_prec = grammar.unused_precedence()
3504 for term, assoc in unused_prec:
3505 errorlog.error("Precedence rule %r defined for unknown symbol %r", assoc, term)
3506 errors = True
3508 if errors:
3509 raise YaccError("Unable to build parser")
3511 # Run the LRGeneratedTable on the grammar
3512 if debug:
3513 errorlog.debug("Generating %s tables", method)
3515 lr = LRGeneratedTable(grammar, method, debuglog)
3517 if debug:
3518 num_sr = len(lr.sr_conflicts)
3520 # Report shift/reduce and reduce/reduce conflicts
3521 if num_sr == 1:
3522 errorlog.warning("1 shift/reduce conflict")
3523 elif num_sr > 1:
3524 errorlog.warning("%d shift/reduce conflicts", num_sr)
3526 num_rr = len(lr.rr_conflicts)
3527 if num_rr == 1:
3528 errorlog.warning("1 reduce/reduce conflict")
3529 elif num_rr > 1:
3530 errorlog.warning("%d reduce/reduce conflicts", num_rr)
3532 # Write out conflicts to the output file
3533 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3534 debuglog.warning("")
3535 debuglog.warning("Conflicts:")
3536 debuglog.warning("")
3538 for state, tok, resolution in lr.sr_conflicts:
3539 debuglog.warning(
3540 "shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution
3541 )
3543 already_reported = set()
3544 for state, rule, rejected in lr.rr_conflicts:
3545 if (state, id(rule), id(rejected)) in already_reported:
3546 continue
3547 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3548 debuglog.warning("rejected rule (%s) in state %d", rejected, state)
3549 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3550 errorlog.warning("rejected rule (%s) in state %d", rejected, state)
3551 already_reported.add((state, id(rule), id(rejected)))
3553 warned_never = []
3554 for state, rule, rejected in lr.rr_conflicts:
3555 if not rejected.reduced and (rejected not in warned_never):
3556 debuglog.warning("Rule (%s) is never reduced", rejected)
3557 errorlog.warning("Rule (%s) is never reduced", rejected)
3558 warned_never.append(rejected)
3560 # Write the table file if requested
3561 if write_tables:
3562 try:
3563 lr.write_table(tabmodule, outputdir, signature)
3564 if tabmodule in sys.modules:
3565 del sys.modules[tabmodule]
3566 except IOError as e:
3567 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3569 # Write a pickled version of the tables
3570 if picklefile:
3571 try:
3572 lr.pickle_table(picklefile, signature)
3573 except IOError as e:
3574 errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3576 # Build the parser
3577 lr.bind_callables(pinfo.pdict)
3578 parser = LRParser(lr, pinfo.error_func)
3580 parse = parser.parse
3581 return parser