MagicStack · yv · Sep 17, 2019 · Sep 18, 2019 · Sep 18, 2019
diff --git a/parsing/__init__.py b/parsing/__init__.py
@@ -123,7 +123,7 @@
 """
 __all__ = ["SpecError", "UnexpectedToken", "Nonterm",
            "Precedence", "Spec", "Token", "Lr", "Glr",
-           "ModuleSpecSource"]
+           "ModuleSpecSource", "Grammar"]
 
 from six import print_
 from six.moves import range
@@ -137,6 +137,7 @@
 from parsing.ast import Symbol, Nonterm, Token                         # noqa
 from parsing.automaton import Spec
 from parsing.module_spec import ModuleSpecSource
+from parsing.class_spec import Grammar
 
 # Exception aliases for legacy code that needs the old names that
 # shadow builtin exceptions
@@ -203,9 +204,10 @@ def reset(self):
         self._start = None
         self._stack = [(Epsilon(self), 0)]
 
-    def token(self, token):
+    def token(self, token, tokenSpec=None):
         """Feed a token to the parser."""
-        tokenSpec = self._spec._sym2spec[type(token)]
+        if tokenSpec is None:
+            tokenSpec = self._spec._sym2spec[type(token)]
         self._act(token, tokenSpec)
 
     def eoi(self):
@@ -230,6 +232,8 @@ def _act(self, sym, symSpec):
         while True:
             top = self._stack[-1]
             if symSpec not in self._spec._action[top[1]]:
+                for k in self._spec._action[top[1]]:
+                    print("K:", repr(k), id(k))
                 raise UnexpectedToken("Unexpected token: %r" % sym)
 
             actions = self._spec._action[top[1]][symSpec]
@@ -275,7 +279,30 @@ def _reduce(self, production):
         self._stack.append((r, self._spec._goto[top[1]][production.lhs]))
 
     def _production(self, production, rhs):
-        sym = production.lhs.nontermType(self)
+        sym = production.lhs.nontermType(self, production.lhs)
+        sym.type = production.lhs.name
+        if rhs:
+            try:
+                first_idx = 0
+                last_idx = len(rhs) - 1
+                # skip epsilon productions, look into lists (for x* and x+)
+                while last_idx >= first_idx and not rhs[last_idx]:
+                    last_idx -= 1
+                while last_idx >= first_idx and not rhs[first_idx]:
+                    first_idx += 1
+                if last_idx >= first_idx:
+                    last_rhs = rhs[last_idx]
+                    if isinstance(last_rhs, list):
+                        last_rhs = last_rhs[-1]
+                    first_rhs = rhs[first_idx]
+                    if isinstance(first_rhs, list):
+                        first_rhs = first_rhs[0]
+                    if first_rhs.range is not None and last_rhs.range is not None:
+                        sym.range = [first_rhs.range[0], last_rhs.range[1]]
+                    else:
+                        sym.range = None
+            except AttributeError:
+                pass
         nRhs = len(rhs)
         assert nRhs == len(production.rhs)
         r = production.method(sym, *rhs)
@@ -406,14 +433,15 @@ def reset(self):
 
         self._paths = []
 
-    def token(self, token):
+    def token(self, token, tokenSpec=None):
         """
 Feed a token to the parser.
 """
         if self._verbose:
             print_("%s" % ("-" * 80))
             print_("INPUT: %r" % token)
-        tokenSpec = self._spec._sym2spec[type(token)]
+        if tokenSpec is None:
+            tokenSpec = self._spec._sym2spec[type(token)]
         self._act(token, tokenSpec)
         if len(self._gss) == 0:
             raise UnexpectedToken("Unexpected token: %r" % token)

diff --git a/parsing/ast.py b/parsing/ast.py
@@ -6,7 +6,8 @@
 """
 
 from parsing.interfaces import is_parser, is_symspec
-
+from parsing.errors import SpecError
+from re import compile as re_compile, escape as re_escape
 
 class Symbol(object):
     def __init__(self, symSpec, parser):
@@ -79,9 +80,11 @@ def reduceB(self, id):
             "%reduce id"
     """
 
-    def __init__(self, parser):
+    def __init__(self, parser, symSpec=None):
         assert is_parser(parser)
-        Symbol.__init__(self, parser._spec._sym2spec[type(self)], parser)
+        if symSpec is None:
+            symSpec = parser._spec._sym2spec[type(self)]
+        Symbol.__init__(self, symSpec, parser)
 
     def merge(self, other):
         """
@@ -140,7 +143,200 @@ class id(Token):
         "%token"
 """
 
-    def __init__(self, parser):
-        assert is_parser(parser)
-        Symbol.__init__(self, parser._spec._sym2spec[type(self)], parser)
+    def __init__(self, parser, spec=None):
+        assert is_parser(parser), parser
+        if spec is None:
+            spec = parser._spec._sym2spec[type(self)]
+        Symbol.__init__(self, spec, parser)
         self.__parser = parser
+
+NOT_SET=object()
+
+
+class ASTToken(Token):
+    def __init__(self, parser, spec, word, range, val=NOT_SET, **kwargs):
+        Token.__init__(self, parser, spec)
+        self.type = spec.name
+        self.word = word
+        if val is NOT_SET:
+            self.val = word
+        else:
+            self.val = val
+        self.range = range
+        self.__dict__.update(kwargs)
+
+    def __repr__(self):
+        return '%s[%d-%d,word=%s,val=%r]'%(
+            self.symSpec.name, self.range[0], self.range[1],
+            self.word, self.val)
+
+
+class TokenBuilder(object):
+    """
+    carries infos for recognizing and building a token
+    """
+    def __init__(self, token_re, prec='none', convert=None, factory=None, keyword=None, name=None):
+        self._re = token_re
+        self._prec = prec
+        self.convert = convert
+        if factory is None:
+            self.factory = ASTToken
+        else:
+            self.factory = factory
+        self.keyword = keyword
+        self.name = name
+
+    def __hash__(self):
+        return hash((
+            self._re, self.name))
+
+    def __eq__(self, other):
+        """
+        equality of two TokenBuilder objects, as needed for unpickling
+        """
+        return self.__dict__ == other.__dict__
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __call__(self, parser, symSpec, word=None, range=None, **kwargs):
+        if self.convert is None:
+            val = word
+        else:
+            val = self.convert(word)
+        return self.factory(parser, symSpec, word, range, val=val, **kwargs)
+
+def is_token_factory(tokenType):
+    if isinstance(tokenType, type) and issubclass(tokenType, Token):
+        return True
+    if isinstance(tokenType, TokenBuilder):
+        return True
+    return False
+
+def mktoken(name, prec='none', re=None, s=None, tokens=None, keyword=None,
+                between=None, escape=None, convert=None):
+    """
+    creates a token class (that is then converted into a TokenSpec), i.e. this is
+    a Token factory factory.
+
+    :param name: the name of the token class
+    :param prec: the precedence
+    :param re: a regular expression describing the token
+    :param s: a fixed string for the token
+    :param tokens: a string containing a space-separated list of matching tokens
+    :param keyword: a keyword is a string that is also matched by another RE
+    :param convert: the function used to construct the semantic function
+    :return:
+    """
+    token_re = None
+    if re is not None:
+        token_re = re
+    elif s is not None:
+        token_re = re_escape(s)
+    elif tokens is not None:
+        token_re = '(?:%s)'%(
+            '|'.join([re_escape(tok) for tok in tokens.split()]))
+    elif between is not None:
+        if len(between) != 2:
+            raise SpecError("Need exactly two items for between: %s"%(between,))
+        starter, ender = between
+        not_enders = []
+        for i in xrange(len(ender)):
+            not_enders.append('{}[^{}]'.format(
+                re_escape(ender[:i]), re_escape(ender[i])))
+        token_re = '{}(?:{})*{}'.format(
+            re_escape(starter),
+            '|'.join([x for x in not_enders]),
+            re_escape(ender))
+        #print(token_re)
+        if convert is None:
+            def my_convert(s):
+                assert s.startswith(starter) and s.endswith(ender)
+                return s[len(starter):-len(ender)]
+            convert = my_convert
+    else:
+        token_re = None
+
+    return TokenBuilder(token_re, prec, convert, keyword=keyword, name=name)
+
+class bcolors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+def print_ast(nonterm, indent=0, attribute_order=None):
+    assert isinstance(nonterm, Symbol), type(nonterm)
+    s_indent = ' ' * indent
+    if isinstance(nonterm, Nonterm):
+        if hasattr(nonterm, 'type'):
+            attr_type = nonterm.type
+        else:
+            attr_type = None
+        cls_type = type(nonterm).__name__
+        if attr_type != cls_type:
+            type_expr = '%s%s%s[%s]'%(
+                bcolors.BOLD, attr_type, bcolors.ENDC, cls_type)
+        else:
+            type_expr = '%s%s%s'%(bcolors.BOLD, attr_type, bcolors.ENDC)
+        if hasattr(nonterm, 'range'):
+            nt_range = nonterm.range
+            range_expr = '%d-%d'%(nt_range[0], nt_range[1])
+        else:
+            range_expr = '??-??'
+        print("%s%s %s"%(
+            s_indent, type_expr,
+            range_expr))
+    else:
+        if nonterm.word != nonterm.val:
+            val_expr = "%r '%s'"%(nonterm.val, nonterm.word)
+        else:
+            val_expr = "'%s'"%(nonterm.word,)
+        if hasattr(nonterm, 'range'):
+            nt_range = nonterm.range
+            range_expr = '%d-%d'%(nt_range[0], nt_range[1])
+        else:
+            range_expr = '??-??'
+        print("%s%s%s%s[%s] %s %s" % (
+            s_indent,
+            bcolors.BOLD, nonterm.type, bcolors.ENDC,
+            type(nonterm).__name__,
+            range_expr,
+            val_expr))
+    d = nonterm.__dict__
+    def print_attribute(k):
+        v = getattr(nonterm, k)
+        if isinstance(v, Symbol):
+            print('%s  %s:' % (s_indent, k))
+            print_ast(v, indent + 4, attribute_order)
+        elif isinstance(v, list):
+            print('%s  %s:' % (s_indent, k))
+            for val in v:
+                if isinstance(val, Symbol):
+                    print_ast(val, indent + 4, attribute_order)
+                else:
+                    print('%s  - %r' % (s_indent, val))
+        elif isinstance(v, dict):
+            print('%s  %s:' % (s_indent, k))
+            for key in v:
+
+                val = v[key]
+                if isinstance(val, Symbol):
+                    print('%s    [%s]' % (s_indent, key))
+                    print_ast(val, indent + 6, attribute_order)
+                else:
+                    print('%s    [%s] %r' % (s_indent, key, val))
+
+    if attribute_order is not None:
+        for k in attribute_order:
+            if k in d:
+                print_attribute(k)
+    for k in sorted(d.keys()):
+        if k[0] != '_' and k not in ['type', 'range'] and (
+                        attribute_order is None or k not in attribute_order):
+            print_attribute(k)
+