| @@ -71,7 +71,7 @@ class Rule(Serialize): | |||||
| expansion : a list of symbols | expansion : a list of symbols | ||||
| order : index of this expansion amongst all rules of the same name | order : index of this expansion amongst all rules of the same name | ||||
| """ | """ | ||||
| __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp') | |||||
| __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') | |||||
| __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' | __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' | ||||
| __serialize_namespace__ = Terminal, NonTerminal, RuleOptions | __serialize_namespace__ = Terminal, NonTerminal, RuleOptions | ||||
| @@ -83,7 +83,6 @@ class Rule(Serialize): | |||||
| self.order = order | self.order = order | ||||
| self.options = options | self.options = options | ||||
| self._hash = hash((self.origin, tuple(self.expansion))) | self._hash = hash((self.origin, tuple(self.expansion))) | ||||
| self._rp = None | |||||
| def _deserialize(self): | def _deserialize(self): | ||||
| self._hash = hash((self.origin, tuple(self.expansion))) | self._hash = hash((self.origin, tuple(self.expansion))) | ||||
| @@ -3,20 +3,16 @@ from collections import Counter, defaultdict | |||||
| from ..utils import bfs, fzset, classify | from ..utils import bfs, fzset, classify | ||||
| from ..exceptions import GrammarError | from ..exceptions import GrammarError | ||||
| from ..grammar import Rule, Terminal, NonTerminal | from ..grammar import Rule, Terminal, NonTerminal | ||||
| import time | |||||
| # optimizations were made so that there should never be two distinct equal RulePtrs | |||||
| # to help with hashtable lookup | |||||
| class RulePtr(object): | class RulePtr(object): | ||||
| __slots__ = ('rule', 'index', '_advance') | |||||
| __slots__ = ('rule', 'index') | |||||
| def __init__(self, rule, index): | def __init__(self, rule, index): | ||||
| assert isinstance(rule, Rule) | assert isinstance(rule, Rule) | ||||
| assert index <= len(rule.expansion) | assert index <= len(rule.expansion) | ||||
| self.rule = rule | self.rule = rule | ||||
| self.index = index | self.index = index | ||||
| self._advance = None | |||||
| def __repr__(self): | def __repr__(self): | ||||
| before = [x.name for x in self.rule.expansion[:self.index]] | before = [x.name for x in self.rule.expansion[:self.index]] | ||||
| @@ -27,19 +23,19 @@ class RulePtr(object): | |||||
| def next(self): | def next(self): | ||||
| return self.rule.expansion[self.index] | return self.rule.expansion[self.index] | ||||
| # don't create duplicate RulePtrs | |||||
| def advance(self, sym): | def advance(self, sym): | ||||
| assert self.next == sym | assert self.next == sym | ||||
| a = self._advance | |||||
| if a is None: | |||||
| a = RulePtr(self.rule, self.index + 1) | |||||
| self._advance = a | |||||
| return a | |||||
| return RulePtr(self.rule, self.index+1) | |||||
| @property | @property | ||||
| def is_satisfied(self): | def is_satisfied(self): | ||||
| return self.index == len(self.rule.expansion) | return self.index == len(self.rule.expansion) | ||||
| def __eq__(self, other): | |||||
| return self.rule == other.rule and self.index == other.index | |||||
| def __hash__(self): | |||||
| return hash((self.rule, self.index)) | |||||
| # state generation ensures no duplicate LR0ItemSets | # state generation ensures no duplicate LR0ItemSets | ||||
| class LR0ItemSet(object): | class LR0ItemSet(object): | ||||
| @@ -159,19 +155,11 @@ class GrammarAnalyzer(object): | |||||
| self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) | self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) | ||||
| # cache RulePtr(r, 0) in r (no duplicate RulePtr objects) | # cache RulePtr(r, 0) in r (no duplicate RulePtr objects) | ||||
| for root_rule in lr0_root_rules.values(): | |||||
| root_rule._rp = RulePtr(root_rule, 0) | |||||
| self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) | |||||
| self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin)) | |||||
| for start, root_rule in lr0_root_rules.items()} | for start, root_rule in lr0_root_rules.items()} | ||||
| self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) | self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) | ||||
| self.nonterminal_transitions = [] | |||||
| self.directly_reads = defaultdict(set) | |||||
| self.reads = defaultdict(set) | |||||
| self.includes = defaultdict(set) | |||||
| self.lookback = defaultdict(set) | |||||
| def expand_rule(self, source_rule, rules_by_origin=None): | def expand_rule(self, source_rule, rules_by_origin=None): | ||||
| "Returns all init_ptrs accessible by rule (recursive)" | "Returns all init_ptrs accessible by rule (recursive)" | ||||
| @@ -183,11 +171,7 @@ class GrammarAnalyzer(object): | |||||
| assert not rule.is_term, rule | assert not rule.is_term, rule | ||||
| for r in rules_by_origin[rule]: | for r in rules_by_origin[rule]: | ||||
| # don't create duplicate RulePtr objects | |||||
| init_ptr = r._rp | |||||
| if init_ptr is None: | |||||
| init_ptr = RulePtr(r, 0) | |||||
| r._rp = init_ptr | |||||
| init_ptr = RulePtr(r, 0) | |||||
| init_ptrs.add(init_ptr) | init_ptrs.add(init_ptr) | ||||
| if r.expansion: # if not empty rule | if r.expansion: # if not empty rule | ||||
| @@ -15,8 +15,6 @@ from ..exceptions import GrammarError | |||||
| from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet | ||||
| from ..grammar import Rule | from ..grammar import Rule | ||||
| import time | |||||
| ###{standalone | ###{standalone | ||||
| class Action: | class Action: | ||||
| @@ -115,8 +113,8 @@ def traverse(x, S, N, X, R, G, F): | |||||
| S.append(x) | S.append(x) | ||||
| d = len(S) | d = len(S) | ||||
| N[x] = d | N[x] = d | ||||
| F[x] = G(x) | |||||
| for y in R(x): | |||||
| F[x] = G[x] | |||||
| for y in R[x]: | |||||
| if N[y] == 0: | if N[y] == 0: | ||||
| traverse(y, S, N, X, R, G, F) | traverse(y, S, N, X, R, G, F) | ||||
| n_x = N[x] | n_x = N[x] | ||||
| @@ -137,9 +135,17 @@ def traverse(x, S, N, X, R, G, F): | |||||
| class LALR_Analyzer(GrammarAnalyzer): | class LALR_Analyzer(GrammarAnalyzer): | ||||
| def __init__(self, parser_conf, debug=False): | |||||
| GrammarAnalyzer.__init__(self, parser_conf, debug) | |||||
| self.nonterminal_transitions = [] | |||||
| self.directly_reads = defaultdict(set) | |||||
| self.reads = defaultdict(set) | |||||
| self.includes = defaultdict(set) | |||||
| self.lookback = defaultdict(set) | |||||
| def compute_lr0_states(self): | def compute_lr0_states(self): | ||||
| self.states = set() | |||||
| self.lr0_states = set() | |||||
| # map of kernels to LR0ItemSets | # map of kernels to LR0ItemSets | ||||
| cache = {} | cache = {} | ||||
| @@ -161,7 +167,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
| state.transitions[sym] = new_state | state.transitions[sym] = new_state | ||||
| yield new_state | yield new_state | ||||
| self.states.add(state) | |||||
| self.lr0_states.add(state) | |||||
| for _ in bfs(self.lr0_start_states.values(), step): | for _ in bfs(self.lr0_start_states.values(), step): | ||||
| pass | pass | ||||
| @@ -174,14 +180,14 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
| assert(rp.index == 0) | assert(rp.index == 0) | ||||
| self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) | self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) | ||||
| for state in self.states: | |||||
| for state in self.lr0_states: | |||||
| seen = set() | seen = set() | ||||
| for rp in state.closure: | for rp in state.closure: | ||||
| if rp.is_satisfied: | if rp.is_satisfied: | ||||
| continue | continue | ||||
| s = rp.next | s = rp.next | ||||
| # if s is a not a nonterminal | # if s is a not a nonterminal | ||||
| if not s in self.lr0_rules_by_origin: | |||||
| if s not in self.lr0_rules_by_origin: | |||||
| continue | continue | ||||
| if s in seen: | if s in seen: | ||||
| continue | continue | ||||
| @@ -201,11 +207,6 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
| if s2 in self.NULLABLE: | if s2 in self.NULLABLE: | ||||
| r.add((next_state, s2)) | r.add((next_state, s2)) | ||||
| def compute_read_sets(self): | |||||
| R = lambda nt: self.reads[nt] | |||||
| G = lambda nt: self.directly_reads[nt] | |||||
| self.read_sets = digraph(self.nonterminal_transitions, R, G) | |||||
| def compute_includes_lookback(self): | def compute_includes_lookback(self): | ||||
| for nt in self.nonterminal_transitions: | for nt in self.nonterminal_transitions: | ||||
| state, nonterminal = nt | state, nonterminal = nt | ||||
| @@ -220,9 +221,8 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
| s = rp.rule.expansion[i] | s = rp.rule.expansion[i] | ||||
| nt2 = (state2, s) | nt2 = (state2, s) | ||||
| state2 = state2.transitions[s] | state2 = state2.transitions[s] | ||||
| if not nt2 in self.reads: | |||||
| if nt2 not in self.reads: | |||||
| continue | continue | ||||
| j = i + 1 | |||||
| for j in range(i + 1, len(rp.rule.expansion)): | for j in range(i + 1, len(rp.rule.expansion)): | ||||
| if not rp.rule.expansion[j] in self.NULLABLE: | if not rp.rule.expansion[j] in self.NULLABLE: | ||||
| break | break | ||||
| @@ -236,20 +236,18 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
| for nt2 in includes: | for nt2 in includes: | ||||
| self.includes[nt2].add(nt) | self.includes[nt2].add(nt) | ||||
| def compute_follow_sets(self): | |||||
| R = lambda nt: self.includes[nt] | |||||
| G = lambda nt: self.read_sets[nt] | |||||
| self.follow_sets = digraph(self.nonterminal_transitions, R, G) | |||||
| def compute_lookaheads(self): | def compute_lookaheads(self): | ||||
| read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads) | |||||
| follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets) | |||||
| for nt, lookbacks in self.lookback.items(): | for nt, lookbacks in self.lookback.items(): | ||||
| for state, rule in lookbacks: | for state, rule in lookbacks: | ||||
| for s in self.follow_sets[nt]: | |||||
| for s in follow_sets[nt]: | |||||
| state.lookaheads[s].add(rule) | state.lookaheads[s].add(rule) | ||||
| def compute_lalr1_states(self): | def compute_lalr1_states(self): | ||||
| m = {} | m = {} | ||||
| for state in self.states: | |||||
| for state in self.lr0_states: | |||||
| actions = {} | actions = {} | ||||
| for la, next_state in state.transitions.items(): | for la, next_state in state.transitions.items(): | ||||
| actions[la] = (Shift, next_state.closure) | actions[la] = (Shift, next_state.closure) | ||||
| @@ -281,3 +279,10 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
| self.parse_table = self._parse_table | self.parse_table = self._parse_table | ||||
| else: | else: | ||||
| self.parse_table = IntParseTable.from_ParseTable(self._parse_table) | self.parse_table = IntParseTable.from_ParseTable(self._parse_table) | ||||
| def compute_lalr(self): | |||||
| self.compute_lr0_states() | |||||
| self.compute_reads_relations() | |||||
| self.compute_includes_lookback() | |||||
| self.compute_lookaheads() | |||||
| self.compute_lalr1_states() | |||||
| @@ -8,8 +8,6 @@ from ..utils import Enumerator, Serialize | |||||
| from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | ||||
| import time | |||||
| ###{standalone | ###{standalone | ||||
| class LALR_Parser(object): | class LALR_Parser(object): | ||||
| @@ -17,13 +15,7 @@ class LALR_Parser(object): | |||||
| assert all(r.options is None or r.options.priority is None | assert all(r.options is None or r.options.priority is None | ||||
| for r in parser_conf.rules), "LALR doesn't yet support prioritization" | for r in parser_conf.rules), "LALR doesn't yet support prioritization" | ||||
| analysis = LALR_Analyzer(parser_conf, debug=debug) | analysis = LALR_Analyzer(parser_conf, debug=debug) | ||||
| analysis.compute_lr0_states() | |||||
| analysis.compute_reads_relations() | |||||
| analysis.compute_read_sets() | |||||
| analysis.compute_includes_lookback() | |||||
| analysis.compute_follow_sets() | |||||
| analysis.compute_lookaheads() | |||||
| analysis.compute_lalr1_states() | |||||
| analysis.compute_lalr() | |||||
| callbacks = parser_conf.callbacks | callbacks = parser_conf.callbacks | ||||
| self._parse_table = analysis.parse_table | self._parse_table = analysis.parse_table | ||||
| @@ -88,11 +80,6 @@ class _Parser: | |||||
| state_stack.append(new_state) | state_stack.append(new_state) | ||||
| value_stack.append(value) | value_stack.append(value) | ||||
| if state_stack[-1] == end_state: | |||||
| return True | |||||
| return False | |||||
| # Main LALR-parser loop | # Main LALR-parser loop | ||||
| for token in stream: | for token in stream: | ||||
| while True: | while True: | ||||
| @@ -111,7 +98,8 @@ class _Parser: | |||||
| while True: | while True: | ||||
| _action, arg = get_action(token) | _action, arg = get_action(token) | ||||
| assert(_action is Reduce) | assert(_action is Reduce) | ||||
| if reduce(arg): | |||||
| reduce(arg) | |||||
| if state_stack[-1] == end_state: | |||||
| return value_stack[-1] | return value_stack[-1] | ||||
| ###} | ###} | ||||