Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: add a more comfortable way of adding grammars #4

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions parsing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@
"""
__all__ = ["SpecError", "UnexpectedToken", "Nonterm",
"Precedence", "Spec", "Token", "Lr", "Glr",
"ModuleSpecSource"]
"ModuleSpecSource", "Grammar"]

from six import print_
from six.moves import range
Expand All @@ -137,6 +137,7 @@
from parsing.ast import Symbol, Nonterm, Token # noqa
from parsing.automaton import Spec
from parsing.module_spec import ModuleSpecSource
from parsing.class_spec import Grammar

# Exception aliases for legacy code that needs the old names that
# shadow builtin exceptions
Expand Down Expand Up @@ -203,9 +204,10 @@ def reset(self):
self._start = None
self._stack = [(Epsilon(self), 0)]

def token(self, token):
def token(self, token, tokenSpec=None):
"""Feed a token to the parser."""
tokenSpec = self._spec._sym2spec[type(token)]
if tokenSpec is None:
tokenSpec = self._spec._sym2spec[type(token)]
self._act(token, tokenSpec)

def eoi(self):
Expand All @@ -230,6 +232,8 @@ def _act(self, sym, symSpec):
while True:
top = self._stack[-1]
if symSpec not in self._spec._action[top[1]]:
for k in self._spec._action[top[1]]:
print("K:", repr(k), id(k))
raise UnexpectedToken("Unexpected token: %r" % sym)

actions = self._spec._action[top[1]][symSpec]
Expand Down Expand Up @@ -275,7 +279,30 @@ def _reduce(self, production):
self._stack.append((r, self._spec._goto[top[1]][production.lhs]))

def _production(self, production, rhs):
sym = production.lhs.nontermType(self)
sym = production.lhs.nontermType(self, production.lhs)
sym.type = production.lhs.name
if rhs:
try:
first_idx = 0
last_idx = len(rhs) - 1
# skip epsilon productions, look into lists (for x* and x+)
while last_idx >= first_idx and not rhs[last_idx]:
last_idx -= 1
while last_idx >= first_idx and not rhs[first_idx]:
first_idx += 1
if last_idx >= first_idx:
last_rhs = rhs[last_idx]
if isinstance(last_rhs, list):
last_rhs = last_rhs[-1]
first_rhs = rhs[first_idx]
if isinstance(first_rhs, list):
first_rhs = first_rhs[0]
if first_rhs.range is not None and last_rhs.range is not None:
sym.range = [first_rhs.range[0], last_rhs.range[1]]
else:
sym.range = None
except AttributeError:
pass
nRhs = len(rhs)
assert nRhs == len(production.rhs)
r = production.method(sym, *rhs)
Expand Down Expand Up @@ -406,14 +433,15 @@ def reset(self):

self._paths = []

def token(self, token):
def token(self, token, tokenSpec=None):
"""
Feed a token to the parser.
"""
if self._verbose:
print_("%s" % ("-" * 80))
print_("INPUT: %r" % token)
tokenSpec = self._spec._sym2spec[type(token)]
if tokenSpec is None:
tokenSpec = self._spec._sym2spec[type(token)]
self._act(token, tokenSpec)
if len(self._gss) == 0:
raise UnexpectedToken("Unexpected token: %r" % token)
Expand Down
208 changes: 202 additions & 6 deletions parsing/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"""

from parsing.interfaces import is_parser, is_symspec

from parsing.errors import SpecError
from re import compile as re_compile, escape as re_escape

class Symbol(object):
def __init__(self, symSpec, parser):
Expand Down Expand Up @@ -79,9 +80,11 @@ def reduceB(self, id):
"%reduce id"
"""

def __init__(self, parser):
def __init__(self, parser, symSpec=None):
assert is_parser(parser)
Symbol.__init__(self, parser._spec._sym2spec[type(self)], parser)
if symSpec is None:
symSpec = parser._spec._sym2spec[type(self)]
Symbol.__init__(self, symSpec, parser)

def merge(self, other):
"""
Expand Down Expand Up @@ -140,7 +143,200 @@ class id(Token):
"%token"
"""

def __init__(self, parser):
assert is_parser(parser)
Symbol.__init__(self, parser._spec._sym2spec[type(self)], parser)
def __init__(self, parser, spec=None):
assert is_parser(parser), parser
if spec is None:
spec = parser._spec._sym2spec[type(self)]
Symbol.__init__(self, spec, parser)
self.__parser = parser

NOT_SET=object()


class ASTToken(Token):
def __init__(self, parser, spec, word, range, val=NOT_SET, **kwargs):
Token.__init__(self, parser, spec)
self.type = spec.name
self.word = word
if val is NOT_SET:
self.val = word
else:
self.val = val
self.range = range
self.__dict__.update(kwargs)

def __repr__(self):
return '%s[%d-%d,word=%s,val=%r]'%(
self.symSpec.name, self.range[0], self.range[1],
self.word, self.val)


class TokenBuilder(object):
"""
carries infos for recognizing and building a token
"""
def __init__(self, token_re, prec='none', convert=None, factory=None, keyword=None, name=None):
self._re = token_re
self._prec = prec
self.convert = convert
if factory is None:
self.factory = ASTToken
else:
self.factory = factory
self.keyword = keyword
self.name = name

def __hash__(self):
return hash((
self._re, self.name))

def __eq__(self, other):
"""
equality of two TokenBuilder objects, as needed for unpickling
"""
return self.__dict__ == other.__dict__

def __ne__(self, other):
return not self == other

def __call__(self, parser, symSpec, word=None, range=None, **kwargs):
if self.convert is None:
val = word
else:
val = self.convert(word)
return self.factory(parser, symSpec, word, range, val=val, **kwargs)

def is_token_factory(tokenType):
if isinstance(tokenType, type) and issubclass(tokenType, Token):
return True
if isinstance(tokenType, TokenBuilder):
return True
return False

def mktoken(name, prec='none', re=None, s=None, tokens=None, keyword=None,
between=None, escape=None, convert=None):
"""
creates a token class (that is then converted into a TokenSpec), i.e. this is
a Token factory factory.

:param name: the name of the token class
:param prec: the precedence
:param re: a regular expression describing the token
:param s: a fixed string for the token
:param tokens: a string containing a space-separated list of matching tokens
:param keyword: a keyword is a string that is also matched by another RE
:param convert: the function used to construct the semantic function
:return:
"""
token_re = None
if re is not None:
token_re = re
elif s is not None:
token_re = re_escape(s)
elif tokens is not None:
token_re = '(?:%s)'%(
'|'.join([re_escape(tok) for tok in tokens.split()]))
elif between is not None:
if len(between) != 2:
raise SpecError("Need exactly two items for between: %s"%(between,))
starter, ender = between
not_enders = []
for i in xrange(len(ender)):
not_enders.append('{}[^{}]'.format(
re_escape(ender[:i]), re_escape(ender[i])))
token_re = '{}(?:{})*{}'.format(
re_escape(starter),
'|'.join([x for x in not_enders]),
re_escape(ender))
#print(token_re)
if convert is None:
def my_convert(s):
assert s.startswith(starter) and s.endswith(ender)
return s[len(starter):-len(ender)]
convert = my_convert
else:
token_re = None

return TokenBuilder(token_re, prec, convert, keyword=keyword, name=name)

class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'

def print_ast(nonterm, indent=0, attribute_order=None):
assert isinstance(nonterm, Symbol), type(nonterm)
s_indent = ' ' * indent
if isinstance(nonterm, Nonterm):
if hasattr(nonterm, 'type'):
attr_type = nonterm.type
else:
attr_type = None
cls_type = type(nonterm).__name__
if attr_type != cls_type:
type_expr = '%s%s%s[%s]'%(
bcolors.BOLD, attr_type, bcolors.ENDC, cls_type)
else:
type_expr = '%s%s%s'%(bcolors.BOLD, attr_type, bcolors.ENDC)
if hasattr(nonterm, 'range'):
nt_range = nonterm.range
range_expr = '%d-%d'%(nt_range[0], nt_range[1])
else:
range_expr = '??-??'
print("%s%s %s"%(
s_indent, type_expr,
range_expr))
else:
if nonterm.word != nonterm.val:
val_expr = "%r '%s'"%(nonterm.val, nonterm.word)
else:
val_expr = "'%s'"%(nonterm.word,)
if hasattr(nonterm, 'range'):
nt_range = nonterm.range
range_expr = '%d-%d'%(nt_range[0], nt_range[1])
else:
range_expr = '??-??'
print("%s%s%s%s[%s] %s %s" % (
s_indent,
bcolors.BOLD, nonterm.type, bcolors.ENDC,
type(nonterm).__name__,
range_expr,
val_expr))
d = nonterm.__dict__
def print_attribute(k):
v = getattr(nonterm, k)
if isinstance(v, Symbol):
print('%s %s:' % (s_indent, k))
print_ast(v, indent + 4, attribute_order)
elif isinstance(v, list):
print('%s %s:' % (s_indent, k))
for val in v:
if isinstance(val, Symbol):
print_ast(val, indent + 4, attribute_order)
else:
print('%s - %r' % (s_indent, val))
elif isinstance(v, dict):
print('%s %s:' % (s_indent, k))
for key in v:

val = v[key]
if isinstance(val, Symbol):
print('%s [%s]' % (s_indent, key))
print_ast(val, indent + 6, attribute_order)
else:
print('%s [%s] %r' % (s_indent, key, val))

if attribute_order is not None:
for k in attribute_order:
if k in d:
print_attribute(k)
for k in sorted(d.keys()):
if k[0] != '_' and k not in ['type', 'range'] and (
attribute_order is None or k not in attribute_order):
print_attribute(k)

Loading