| @@ -8,9 +8,15 @@ class Symbol(object): | |||
| assert isinstance(other, Symbol), other | |||
| return self.is_term == other.is_term and self.name == other.name | |||
| def __ne__(self, other): | |||
| return not (self == other) | |||
| def __hash__(self): | |||
| return hash(self.name) | |||
| def __repr__(self): | |||
| return '%s(%r)' % (type(self).__name__, self.name) | |||
| class Terminal(Symbol): | |||
| is_term = True | |||
| @@ -3,7 +3,7 @@ | |||
| import re | |||
| from .utils import Str, classify | |||
| from .common import PatternStr, PatternRE, TokenDef | |||
| from .common import is_terminal, PatternStr, PatternRE, TokenDef | |||
| ###{standalone | |||
| class LexError(Exception): | |||
| @@ -234,7 +234,7 @@ class ContextualLexer: | |||
| lexer = lexer_by_tokens[key] | |||
| except KeyError: | |||
| accepts = set(accepts) | set(ignore) | set(always_accept) | |||
| state_tokens = [tokens_by_name[n] for n in accepts if n.is_term and n.name!='$END'] | |||
| state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END'] | |||
| lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks) | |||
| lexer_by_tokens[key] = lexer | |||
| @@ -110,7 +110,7 @@ class ParseTreeBuilder: | |||
| def _init_builders(self, rules): | |||
| filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out} | |||
| filter_out |= {sym for rule in rules for sym in rule.expansion if sym.is_term and sym.filter_out} | |||
| assert all(t.filter_out for t in filter_out) | |||
| assert all(t.name.startswith('_') for t in filter_out) | |||
| for rule in rules: | |||
| options = rule.options | |||
| @@ -7,7 +7,11 @@ from .lexer import Lexer, ContextualLexer, Token | |||
| from .common import GrammarError | |||
| from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk | |||
| from .tree import Tree | |||
| from .grammar import Terminal | |||
| from .grammar import Terminal, NonTerminal | |||
| def terminals(seq): | |||
| # return [Terminal(t) for t in seq] | |||
| return seq | |||
| class WithLexer: | |||
| def init_traditional_lexer(self, lexer_conf): | |||
| @@ -18,7 +22,10 @@ class WithLexer: | |||
| self.lexer_conf = lexer_conf | |||
| states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} | |||
| always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | |||
| self.lexer = ContextualLexer(lexer_conf.tokens, states, ignore=lexer_conf.ignore, always_accept=always_accept, user_callbacks=lexer_conf.callbacks) | |||
| self.lexer = ContextualLexer(lexer_conf.tokens, states, | |||
| ignore=terminals(lexer_conf.ignore), | |||
| always_accept=terminals(always_accept), | |||
| user_callbacks=lexer_conf.callbacks) | |||
| def lex(self, text): | |||
| stream = self.lexer.lex(text) | |||
| @@ -74,7 +81,7 @@ class Earley_NoLex: | |||
| def match(self, term, text, index=0): | |||
| return self.regexps[term].match(text, index) | |||
| return self.regexps[term.name].match(text, index) | |||
| def _prepare_match(self, lexer_conf): | |||
| self.regexps = {} | |||
| @@ -8,47 +8,19 @@ | |||
| from collections import defaultdict | |||
| import itertools | |||
| from ..common import ParseError, is_terminal | |||
| from ..common import ParseError | |||
| from ..lexer import Token | |||
| from ..tree import Tree | |||
| from ..grammar import Terminal as T, NonTerminal as NT, Symbol | |||
| try: | |||
| xrange | |||
| except NameError: | |||
| xrange = range | |||
| class Symbol(object): | |||
| """Any grammar symbol.""" | |||
| def __init__(self, s): | |||
| self.s = s | |||
| def __repr__(self): | |||
| return '%s(%s)' % (type(self).__name__, str(self)) | |||
| def __str__(self): | |||
| return str(self.s) | |||
| def __eq__(self, other): | |||
| return self.s == str(other) | |||
| def __ne__(self, other): | |||
| return not self.__eq__(other) | |||
| def __hash__(self): | |||
| return hash((type(self), str(self.s))) | |||
| class T(Symbol): | |||
| """Terminal.""" | |||
| def match(self, s): | |||
| return self.s == s.type | |||
| class NT(Symbol): | |||
| """Non-terminal.""" | |||
| pass | |||
| def match(t, s): | |||
| assert isinstance(t, T) | |||
| return t.name == s.type | |||
| class Rule(object): | |||
| @@ -121,10 +93,12 @@ class Parser(object): | |||
| def _to_rule(self, lark_rule): | |||
| """Converts a lark rule, (lhs, rhs, callback, options), to a Rule.""" | |||
| assert isinstance(lark_rule.origin, NT) | |||
| assert all(isinstance(x, Symbol) for x in lark_rule.expansion) | |||
| return Rule( | |||
| NT(lark_rule.origin), [ | |||
| T(x) if is_terminal(x) else NT(x) for x in lark_rule.expansion | |||
| ], weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0, alias=lark_rule.alias) | |||
| lark_rule.origin, lark_rule.expansion, | |||
| weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0, | |||
| alias=lark_rule.alias) | |||
| def parse(self, tokenized): # pylint: disable=invalid-name | |||
| """Parses input, which is a list of tokens.""" | |||
| @@ -132,7 +106,7 @@ class Parser(object): | |||
| # Check if the parse succeeded. | |||
| if all(r.lhs != self.start for r in table[(0, len(tokenized) - 1)]): | |||
| raise ParseError('Parsing failed.') | |||
| parse = trees[(0, len(tokenized) - 1)][NT(self.start)] | |||
| parse = trees[(0, len(tokenized) - 1)][self.start] | |||
| return self._to_tree(revert_cnf(parse)) | |||
| def _to_tree(self, rule_node): | |||
| @@ -143,8 +117,8 @@ class Parser(object): | |||
| if isinstance(child, RuleNode): | |||
| children.append(self._to_tree(child)) | |||
| else: | |||
| assert isinstance(child.s, Token) | |||
| children.append(child.s) | |||
| assert isinstance(child.name, Token) | |||
| children.append(child.name) | |||
| t = Tree(orig_rule.origin, children) | |||
| t.rule=orig_rule | |||
| return t | |||
| @@ -169,7 +143,7 @@ def _parse(s, g): | |||
| # Populate base case with existing terminal production rules | |||
| for i, w in enumerate(s): | |||
| for terminal, rules in g.terminal_rules.items(): | |||
| if terminal.match(w): | |||
| if match(terminal, w): | |||
| for rule in rules: | |||
| table[(i, i)].add(rule) | |||
| if (rule.lhs not in trees[(i, i)] or | |||
| @@ -349,13 +323,13 @@ def revert_cnf(node): | |||
| if isinstance(node, T): | |||
| return node | |||
| # Reverts TERM rule. | |||
| if node.rule.lhs.s.startswith('__T_'): | |||
| if node.rule.lhs.name.startswith('__T_'): | |||
| return node.children[0] | |||
| else: | |||
| children = [] | |||
| for child in map(revert_cnf, node.children): | |||
| # Reverts BIN rule. | |||
| if isinstance(child, RuleNode) and child.rule.lhs.s.startswith('__SP_'): | |||
| if isinstance(child, RuleNode) and child.rule.lhs.name.startswith('__SP_'): | |||
| children += child.children | |||
| else: | |||
| children.append(child) | |||
| @@ -98,14 +98,14 @@ class Parser: | |||
| for item in to_scan: | |||
| m = match(item.expect, stream, i) | |||
| if m: | |||
| t = Token(item.expect, m.group(0), i, text_line, text_column) | |||
| t = Token(item.expect.name, m.group(0), i, text_line, text_column) | |||
| delayed_matches[m.end()].append(item.advance(t)) | |||
| s = m.group(0) | |||
| for j in range(1, len(s)): | |||
| m = match(item.expect, s[:-j]) | |||
| if m: | |||
| t = Token(item.expect, m.group(0), i, text_line, text_column) | |||
| t = Token(item.expect.name, m.group(0), i, text_line, text_column) | |||
| delayed_matches[i+m.end()].append(item.advance(t)) | |||
| next_set = Column(i+1, self.FIRST, predict_all=self.predict_all) | |||