| @@ -0,0 +1,64 @@ | |||||
| import sys | |||||
| from lark.lark import Lark, inline_args | |||||
| from lark.tree import Transformer | |||||
| json_grammar = r""" | |||||
| ?start: value | |||||
| ?value: object | |||||
| | array | |||||
| | string | |||||
| | number | |||||
| | "true" -> true | |||||
| | "false" -> false | |||||
| | "null" -> null | |||||
| array : "[" [value ("," value)*] "]" | |||||
| object : "{" [pair ("," pair)*] "}" | |||||
| pair : string ":" value | |||||
| number : /-?\d+(\.\d+)?([eE][+-]?\d+)?/ | |||||
| string : /".*?(?<!\\)"/ | |||||
| WS.ignore.newline: /[ \t\n]+/ | |||||
| """ | |||||
| class TreeToJson(Transformer): | |||||
| @inline_args | |||||
| def string(self, s): | |||||
| return s[1:-1] | |||||
| array = list | |||||
| pair = tuple | |||||
| object = dict | |||||
| number = inline_args(float) | |||||
| null = lambda self, _: None | |||||
| true = lambda self, _: True | |||||
| false = lambda self, _: False | |||||
| json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||||
| parse = json_parser.parse | |||||
| def test(): | |||||
| test_json = ''' | |||||
| { | |||||
| "empty_object" : {}, | |||||
| "empty_array" : [], | |||||
| "booleans" : { "YES" : true, "NO" : false }, | |||||
| "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], | |||||
| "strings" : [ "This", [ "And" , "That" ] ], | |||||
| "nothing" : null | |||||
| } | |||||
| ''' | |||||
| j = parse(test_json) | |||||
| print j | |||||
| import json | |||||
| assert j == json.loads(test_json) | |||||
| if __name__ == '__main__': | |||||
| test() | |||||
| with open(sys.argv[1]) as f: | |||||
| print parse(f.read()) | |||||
| @@ -0,0 +1,7 @@ | |||||
| class GrammarError(Exception): | |||||
| pass | |||||
| def is_terminal(sym): | |||||
| return sym.isupper() or sym[0] == '$' | |||||
| @@ -1,14 +1,9 @@ | |||||
| from collections import defaultdict, deque | from collections import defaultdict, deque | ||||
| from utils import classify, classify_bool, bfs, fzset | from utils import classify, classify_bool, bfs, fzset | ||||
| from common import GrammarError, is_terminal | |||||
| ACTION_SHIFT = 0 | ACTION_SHIFT = 0 | ||||
| class GrammarError(Exception): | |||||
| pass | |||||
| def is_terminal(sym): | |||||
| return sym.isupper() or sym[0] == '$' | |||||
| class Rule(object): | class Rule(object): | ||||
| """ | """ | ||||
| origin : a symbol | origin : a symbol | ||||
| @@ -61,9 +56,10 @@ def update_set(set1, set2): | |||||
| return set1 != copy | return set1 != copy | ||||
| class GrammarAnalyzer(object): | class GrammarAnalyzer(object): | ||||
| def __init__(self, rule_tuples): | |||||
| def __init__(self, rule_tuples, start_symbol): | |||||
| self.start_symbol = start_symbol | |||||
| rule_tuples = list(rule_tuples) | rule_tuples = list(rule_tuples) | ||||
| rule_tuples.append(('$root', ['start', '$end'])) | |||||
| rule_tuples.append(('$root', [start_symbol, '$end'])) | |||||
| rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples] | rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples] | ||||
| self.rules = set() | self.rules = set() | ||||
| @@ -78,7 +74,7 @@ class GrammarAnalyzer(object): | |||||
| if not (is_terminal(sym) or sym in self.rules_by_origin): | if not (is_terminal(sym) or sym in self.rules_by_origin): | ||||
| raise GrammarError("Using an undefined rule: %s" % sym) | raise GrammarError("Using an undefined rule: %s" % sym) | ||||
| self.init_state = self.expand_rule('start') | |||||
| self.init_state = self.expand_rule(start_symbol) | |||||
| def expand_rule(self, rule): | def expand_rule(self, rule): | ||||
| "Returns all init_ptrs accessible by rule (recursive)" | "Returns all init_ptrs accessible by rule (recursive)" | ||||
| @@ -7,8 +7,8 @@ from .load_grammar import load_grammar | |||||
| from .tree import Tree, Transformer | from .tree import Tree, Transformer | ||||
| from .lexer import Lexer | from .lexer import Lexer | ||||
| from .grammar_analysis import GrammarAnalyzer, is_terminal | |||||
| from . import parser, earley | |||||
| from .parse_tree_builder import ParseTreeBuilder | |||||
| from .parser_frontends import ENGINE_DICT | |||||
| class LarkOptions(object): | class LarkOptions(object): | ||||
| """Specifies the options for Lark | """Specifies the options for Lark | ||||
| @@ -23,6 +23,7 @@ class LarkOptions(object): | |||||
| keep_all_tokens - Don't automagically remove "punctuation" tokens (default: True) | keep_all_tokens - Don't automagically remove "punctuation" tokens (default: True) | ||||
| cache_grammar - Cache the Lark grammar (Default: False) | cache_grammar - Cache the Lark grammar (Default: False) | ||||
| postlex - Lexer post-processing (Default: None) | postlex - Lexer post-processing (Default: None) | ||||
| start - The start symbol (Default: start) | |||||
| """ | """ | ||||
| __doc__ += OPTIONS_DOC | __doc__ += OPTIONS_DOC | ||||
| def __init__(self, options_dict): | def __init__(self, options_dict): | ||||
| @@ -36,6 +37,7 @@ class LarkOptions(object): | |||||
| self.postlex = o.pop('postlex', None) | self.postlex = o.pop('postlex', None) | ||||
| self.parser = o.pop('parser', 'earley') | self.parser = o.pop('parser', 'earley') | ||||
| self.transformer = o.pop('transformer', None) | self.transformer = o.pop('transformer', None) | ||||
| self.start = o.pop('start', 'start') | |||||
| assert self.parser in ENGINE_DICT | assert self.parser in ENGINE_DICT | ||||
| if self.parser == 'earley' and self.transformer: | if self.parser == 'earley' and self.transformer: | ||||
| @@ -47,71 +49,8 @@ class LarkOptions(object): | |||||
| raise ValueError("Unknown options: %s" % o.keys()) | raise ValueError("Unknown options: %s" % o.keys()) | ||||
| class Callback(object): | |||||
| pass | |||||
| class RuleTreeToText(Transformer): | |||||
| def expansions(self, x): | |||||
| return x | |||||
| def expansion(self, symbols): | |||||
| return [sym.value for sym in symbols], None | |||||
| def alias(self, ((expansion, _alias), alias)): | |||||
| assert _alias is None, (alias, expansion, '-', _alias) | |||||
| return expansion, alias.value | |||||
| def create_rule_handler(expansion, usermethod): | |||||
| to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion) | |||||
| if not (is_terminal(sym) and sym.startswith('_'))] | |||||
| def _build_ast(match): | |||||
| children = [] | |||||
| for i, to_expand in to_include: | |||||
| if to_expand: | |||||
| children += match[i].children | |||||
| else: | |||||
| children.append(match[i]) | |||||
| return usermethod(children) | |||||
| return _build_ast | |||||
| def create_expand1_tree_builder_function(tree_builder): | |||||
| def f(children): | |||||
| if len(children) == 1: | |||||
| return children[0] | |||||
| else: | |||||
| return tree_builder(children) | |||||
| return f | |||||
| class LALR: | |||||
| def build_parser(self, rules, callback): | |||||
| ga = GrammarAnalyzer(rules) | |||||
| ga.analyze() | |||||
| return parser.Parser(ga, callback) | |||||
| class Earley: | |||||
| @staticmethod | |||||
| def _process_expansion(x): | |||||
| return [{'literal': s} if is_terminal(s) else s for s in x] | |||||
| def build_parser(self, rules, callback): | |||||
| rules = [{'name':n, 'symbols': self._process_expansion(x), 'postprocess':getattr(callback, a)} for n,x,a in rules] | |||||
| return EarleyParser(earley.Parser(rules, 'start')) | |||||
| class EarleyParser: | |||||
| def __init__(self, parser): | |||||
| self.parser = parser | |||||
| def parse(self, text): | |||||
| res = self.parser.parse(text) | |||||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||||
| return res[0] | |||||
| ENGINE_DICT = { 'lalr': LALR, 'earley': Earley } | |||||
| class Lark: | class Lark: | ||||
| def __init__(self, grammar, **options): | def __init__(self, grammar, **options): | ||||
| """ | """ | ||||
| @@ -147,6 +86,7 @@ class Lark: | |||||
| self.lexer = self._build_lexer() | self.lexer = self._build_lexer() | ||||
| if not self.options.only_lex: | if not self.options.only_lex: | ||||
| self.parser_engine = ENGINE_DICT[self.options.parser]() | self.parser_engine = ENGINE_DICT[self.options.parser]() | ||||
| self.parse_tree_builder = ParseTreeBuilder(self.options.tree_class) | |||||
| self.parser = self._build_parser() | self.parser = self._build_parser() | ||||
| def _build_lexer(self): | def _build_lexer(self): | ||||
| @@ -160,50 +100,12 @@ class Lark: | |||||
| def _build_parser(self): | def _build_parser(self): | ||||
| transformer = self.options.transformer | |||||
| callback = Callback() | |||||
| rules = [] | |||||
| rule_tree_to_text = RuleTreeToText() | |||||
| for origin, tree in self.rules.items(): | |||||
| for expansion, alias in rule_tree_to_text.transform(tree): | |||||
| if alias and origin.startswith('_'): | |||||
| raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases" % origin) | |||||
| expand1 = origin.startswith('?') | |||||
| _origin = origin.lstrip('?*') | |||||
| if alias: | |||||
| alias = alias.lstrip('*') | |||||
| _alias = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | |||||
| try: | |||||
| f = transformer._get_func(alias or _origin) | |||||
| # f = getattr(transformer, alias or _origin) | |||||
| except AttributeError: | |||||
| if alias: | |||||
| f = self._create_tree_builder_function(alias) | |||||
| else: | |||||
| f = self._create_tree_builder_function(_origin) | |||||
| if expand1: | |||||
| f = create_expand1_tree_builder_function(f) | |||||
| alias_handler = create_rule_handler(expansion, f) | |||||
| assert not hasattr(callback, _alias) | |||||
| setattr(callback, _alias, alias_handler) | |||||
| rules.append((_origin, expansion, _alias)) | |||||
| return self.parser_engine.build_parser(rules, callback) | |||||
| rules, callback = self.parse_tree_builder.create_tree_builder(self.rules, self.options.transformer) | |||||
| return self.parser_engine.build_parser(rules, callback, self.options.start) | |||||
| __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC | __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC | ||||
| def _create_tree_builder_function(self, name): | |||||
| tree_class = self.options.tree_class | |||||
| def f(children): | |||||
| return tree_class(name, children) | |||||
| return f | |||||
| def lex(self, text): | def lex(self, text): | ||||
| stream = self.lexer.lex(text) | stream = self.lexer.lex(text) | ||||
| if self.options.postlex: | if self.options.postlex: | ||||
| @@ -1,16 +1,18 @@ | |||||
| import re | import re | ||||
| import codecs | import codecs | ||||
| from lexer import Lexer, Token | |||||
| from grammar_analysis import GrammarAnalyzer | |||||
| from parser import Parser | |||||
| from .lexer import Lexer, Token | |||||
| from tree import Tree as T, Transformer, InlineTransformer, Visitor | |||||
| from .parse_tree_builder import ParseTreeBuilder | |||||
| from .parser_frontends import LALR | |||||
| from .common import is_terminal, GrammarError | |||||
| from .tree import Tree as T, Transformer, InlineTransformer, Visitor | |||||
| unicode_escape = codecs.getdecoder('unicode_escape') | unicode_escape = codecs.getdecoder('unicode_escape') | ||||
| _TOKEN_NAMES = { | _TOKEN_NAMES = { | ||||
| ':' : 'COLON', | |||||
| ':' : '_COLON', | |||||
| ',' : 'COMMA', | ',' : 'COMMA', | ||||
| ';' : 'SEMICOLON', | ';' : 'SEMICOLON', | ||||
| '+' : 'PLUS', | '+' : 'PLUS', | ||||
| @@ -26,7 +28,7 @@ _TOKEN_NAMES = { | |||||
| '<' : 'LESSTHAN', | '<' : 'LESSTHAN', | ||||
| '>' : 'MORETHAN', | '>' : 'MORETHAN', | ||||
| '=' : 'EQUAL', | '=' : 'EQUAL', | ||||
| '.' : 'DOT', | |||||
| '.' : '_DOT', | |||||
| '%' : 'PERCENT', | '%' : 'PERCENT', | ||||
| '`' : 'BACKQUOTE', | '`' : 'BACKQUOTE', | ||||
| '^' : 'CIRCUMFLEX', | '^' : 'CIRCUMFLEX', | ||||
| @@ -34,8 +36,8 @@ _TOKEN_NAMES = { | |||||
| '\'' : 'QUOTE', | '\'' : 'QUOTE', | ||||
| '~' : 'TILDE', | '~' : 'TILDE', | ||||
| '@' : 'AT', | '@' : 'AT', | ||||
| '(' : 'LPAR', | |||||
| ')' : 'RPAR', | |||||
| '(' : '_LPAR', | |||||
| ')' : '_RPAR', | |||||
| '{' : 'LBRACE', | '{' : 'LBRACE', | ||||
| '}' : 'RBRACE', | '}' : 'RBRACE', | ||||
| '[' : 'LSQB', | '[' : 'LSQB', | ||||
| @@ -44,151 +46,58 @@ _TOKEN_NAMES = { | |||||
| # Grammar Parser | # Grammar Parser | ||||
| TOKENS = { | TOKENS = { | ||||
| 'LPAR': '\(', | |||||
| 'RPAR': '\)', | |||||
| 'LBRA': '\[', | |||||
| 'RBRA': '\]', | |||||
| '_LPAR': '\(', | |||||
| '_RPAR': '\)', | |||||
| '_LBRA': '\[', | |||||
| '_RBRA': '\]', | |||||
| 'OP': '[+*?]', | 'OP': '[+*?]', | ||||
| 'COLON': ':', | |||||
| 'OR': '\|', | |||||
| 'DOT': '\.', | |||||
| '_COLON': ':', | |||||
| '_OR': '\|', | |||||
| '_DOT': '\.', | |||||
| 'RULE': '[_?*]?[a-z][_a-z0-9]*', | 'RULE': '[_?*]?[a-z][_a-z0-9]*', | ||||
| 'TOKEN': '_?[A-Z][_A-Z0-9]*', | 'TOKEN': '_?[A-Z][_A-Z0-9]*', | ||||
| 'STRING': r'".*?[^\\]"', | 'STRING': r'".*?[^\\]"', | ||||
| 'REGEXP': r"/(.|\n)*?[^\\]/", | 'REGEXP': r"/(.|\n)*?[^\\]/", | ||||
| 'NL': r'(\r?\n)+\s*', | |||||
| '_NL': r'(\r?\n)+\s*', | |||||
| 'WS': r'[ \t]+', | 'WS': r'[ \t]+', | ||||
| 'COMMENT': r'//[^\n]*\n', | 'COMMENT': r'//[^\n]*\n', | ||||
| 'TO': '->' | |||||
| '_TO': '->' | |||||
| } | } | ||||
| RULES = [ | |||||
| ('start', ['list']), | |||||
| ('list', ['item']), | |||||
| ('list', ['list', 'item']), | |||||
| ('item', ['rule']), | |||||
| ('item', ['token']), | |||||
| ('item', ['NL']), | |||||
| ('rule', ['RULE', 'COLON', 'expansions', 'NL']), | |||||
| ('expansions', ['expansion']), | |||||
| ('expansions', ['expansions', 'OR', 'expansion']), | |||||
| ('expansions', ['expansions', 'NL', 'OR', 'expansion']), | |||||
| ('expansion', ['_expansion']), | |||||
| ('expansion', ['_expansion', 'TO', 'RULE']), | |||||
| RULES = { | |||||
| 'start': ['list'], | |||||
| 'list': ['item', 'list item'], | |||||
| 'item': ['rule', 'token', '_NL'], | |||||
| ('_expansion', []), | |||||
| ('_expansion', ['_expansion', 'expr']), | |||||
| 'rule': ['RULE _COLON expansions _NL'], | |||||
| 'expansions': ['expansion', | |||||
| 'expansions _OR expansion', | |||||
| 'expansions _NL _OR expansion'], | |||||
| ('expr', ['atom']), | |||||
| ('expr', ['atom', 'OP']), | |||||
| 'expansion': ['_expansion', | |||||
| '_expansion _TO RULE'], | |||||
| ('atom', ['LPAR', 'expansions', 'RPAR']), | |||||
| ('atom', ['maybe']), | |||||
| '_expansion': ['', '_expansion expr'], | |||||
| ('atom', ['RULE']), | |||||
| ('atom', ['TOKEN']), | |||||
| ('atom', ['anontoken']), | |||||
| '?expr': ['atom', | |||||
| 'atom OP'], | |||||
| ('anontoken', ['tokenvalue']), | |||||
| '?atom': ['_LPAR expansions _RPAR', | |||||
| 'maybe', | |||||
| 'RULE', | |||||
| 'TOKEN', | |||||
| 'anontoken'], | |||||
| ('maybe', ['LBRA', 'expansions', 'RBRA']), | |||||
| 'anontoken': ['tokenvalue'], | |||||
| ('token', ['TOKEN', 'COLON', 'tokenvalue', 'NL']), | |||||
| ('token', ['TOKEN', 'tokenmods', 'COLON', 'tokenvalue', 'NL']), | |||||
| ('tokenvalue', ['REGEXP']), | |||||
| ('tokenvalue', ['STRING']), | |||||
| ('tokenmods', ['DOT', 'RULE']), | |||||
| ('tokenmods', ['tokenmods', 'DOT', 'RULE']), | |||||
| ] | |||||
| class SaveDefinitions(object): | |||||
| def __init__(self): | |||||
| self.rules = {} | |||||
| self.token_set = set() | |||||
| self.tokens = [] | |||||
| self.i = 0 | |||||
| 'maybe': ['_LBRA expansions _RBRA'], | |||||
| 'token': ['TOKEN _COLON tokenvalue _NL', | |||||
| 'TOKEN tokenmods _COLON tokenvalue _NL'], | |||||
| def atom__3(self, _1, value, _2): | |||||
| return value | |||||
| def atom__1(self, value): | |||||
| return value | |||||
| def expr__1(self, expr): | |||||
| return expr | |||||
| def expr(self, *x): | |||||
| return T('expr', x) | |||||
| def expansion__1(self, expansion): | |||||
| return expansion | |||||
| def expansion__3(self, expansion, _, alias): | |||||
| return T('alias', [expansion, alias]) | |||||
| def _expansion(self, *x): | |||||
| return T('expansion', x) | |||||
| def expansions(self, *x): | |||||
| items = [i for i in x if isinstance(i, T)] | |||||
| return T('expansions', items) | |||||
| def maybe(self, _1, expr, _2): | |||||
| return T('expr', [expr, Token('OP', '?', -1)]) | |||||
| def rule(self, name, _1, expansion, _2): | |||||
| name = name.value | |||||
| if name in self.rules: | |||||
| raise ValueError("Rule '%s' defined more than once" % name) | |||||
| self.rules[name] = expansion | |||||
| def token(self, *x): | |||||
| name = x[0].value | |||||
| if name in self.token_set: | |||||
| raise ValueError("Token '%s' defined more than once" % name) | |||||
| self.token_set.add(name) | |||||
| if len(x) == 4: | |||||
| self.tokens.append((name, x[2], [])) | |||||
| else: | |||||
| self.tokens.append((name, x[3], x[1].children)) | |||||
| def tokenvalue(self, tokenvalue): | |||||
| return tokenvalue | |||||
| def anontoken(self, token): | |||||
| if token.type == 'STRING': | |||||
| value = token.value[1:-1] | |||||
| try: | |||||
| token_name = _TOKEN_NAMES[value] | |||||
| except KeyError: | |||||
| if value.isalnum() and value[0].isalpha(): | |||||
| token_name = value.upper() | |||||
| else: | |||||
| token_name = 'ANONSTR_%d' % self.i | |||||
| self.i += 1 | |||||
| token_name = '__' + token_name | |||||
| elif token.type == 'REGEXP': | |||||
| token_name = 'ANONRE_%d' % self.i | |||||
| self.i += 1 | |||||
| else: | |||||
| assert False, x | |||||
| if token_name not in self.token_set: | |||||
| self.token_set.add(token_name) | |||||
| self.tokens.append((token_name, token, [])) | |||||
| return Token('TOKEN', token_name, -1) | |||||
| def tokenmods__2(self, _, rule): | |||||
| return T('tokenmods', [rule.value]) | |||||
| def tokenmods__3(self, tokenmods, _, rule): | |||||
| return T('tokenmods', tokenmods.children + [rule.value]) | |||||
| def start(self, *x): pass | |||||
| def list(self, *x): pass | |||||
| def item(self, *x): pass | |||||
| '?tokenvalue': ['REGEXP', 'STRING'], | |||||
| 'tokenmods': ['_DOT RULE', 'tokenmods _DOT RULE'], | |||||
| } | |||||
| class EBNF_to_BNF(InlineTransformer): | class EBNF_to_BNF(InlineTransformer): | ||||
| @@ -281,46 +190,110 @@ def dict_update_safe(d1, d2): | |||||
| d1[k] = v | d1[k] = v | ||||
| def generate_aliases(): | |||||
| sd = SaveDefinitions() | |||||
| for name, expansion in RULES: | |||||
| try: | |||||
| f = getattr(sd, "%s__%s" % (name, len(expansion))) | |||||
| except AttributeError: | |||||
| f = getattr(sd, name) | |||||
| yield name, expansion, f.__name__ | |||||
| class RuleTreeToText(Transformer): | |||||
| def expansions(self, x): | |||||
| return x | |||||
| def expansion(self, symbols): | |||||
| return [sym.value for sym in symbols], None | |||||
| def alias(self, ((expansion, _alias), alias)): | |||||
| assert _alias is None, (alias, expansion, '-', _alias) | |||||
| return expansion, alias.value | |||||
| class SimplifyTree(InlineTransformer): | |||||
| def maybe(self, expr): | |||||
| return T('expr', [expr, Token('OP', '?', -1)]) | |||||
| def tokenmods(self, *args): | |||||
| if len(args) == 1: | |||||
| return list(args) | |||||
| tokenmods, value = args | |||||
| return tokenmods + [value] | |||||
| def get_tokens(tree, token_set): | |||||
| tokens = [] | |||||
| for t in tree.find_data('token'): | |||||
| x = t.children | |||||
| name = x[0].value | |||||
| assert not name.startswith('__'), 'Names starting with double-underscore are reserved (Error at %s)' % name | |||||
| if name in token_set: | |||||
| raise ValueError("Token '%s' defined more than once" % name) | |||||
| token_set.add(name) | |||||
| if len(x) == 2: | |||||
| yield name, x[1], [] | |||||
| else: | |||||
| assert len(x) == 3 | |||||
| yield name, x[2], x[1] | |||||
| class ExtractAnonTokens(InlineTransformer): | |||||
| def __init__(self, tokens, token_set): | |||||
| self.tokens = tokens | |||||
| self.token_set = token_set | |||||
| self.token_reverse = {value[1:-1]: name for name, value, _flags in tokens} | |||||
| def anontoken(self, token): | |||||
| if token.type == 'STRING': | |||||
| value = token.value[1:-1] | |||||
| try: | |||||
| # If already defined, use the user-defined token name | |||||
| token_name = self.token_reverse[value] | |||||
| except KeyError: | |||||
| # Try to assign an indicative anon-token name, otherwise use a numbered name | |||||
| try: | |||||
| token_name = _TOKEN_NAMES[value] | |||||
| except KeyError: | |||||
| if value.isalnum() and value[0].isalpha(): | |||||
| token_name = value.upper() | |||||
| else: | |||||
| token_name = 'ANONSTR_%d' % self.i | |||||
| self.i += 1 | |||||
| token_name = '__' + token_name | |||||
| elif token.type == 'REGEXP': | |||||
| token_name = 'ANONRE_%d' % self.i | |||||
| self.i += 1 | |||||
| else: | |||||
| assert False, x | |||||
| if token_name not in self.token_set: | |||||
| self.token_set.add(token_name) | |||||
| self.tokens.append((token_name, token, [])) | |||||
| return Token('TOKEN', token_name, -1) | |||||
| def inline_args(f): | |||||
| def _f(self, args): | |||||
| return f(*args) | |||||
| return _f | |||||
| class GrammarLoader: | class GrammarLoader: | ||||
| def __init__(self): | def __init__(self): | ||||
| self.rules = list(generate_aliases()) | |||||
| self.ga = GrammarAnalyzer(self.rules) | |||||
| self.ga.analyze() | |||||
| self.lexer = Lexer(TOKENS.items(), {}, ignore=['WS', 'COMMENT']) | self.lexer = Lexer(TOKENS.items(), {}, ignore=['WS', 'COMMENT']) | ||||
| self.simplify_rule = SimplifyRule_Visitor() | |||||
| def _generate_parser_callbacks(self, callbacks): | |||||
| d = {alias: inline_args(getattr(callbacks, alias)) | |||||
| for _n, _x, alias in self.rules} | |||||
| return type('Callback', (), d)() | |||||
| d = {r: [(x.split(), None) for x in xs] for r, xs in RULES.items()} | |||||
| rules, callback = ParseTreeBuilder(T).create_tree_builder(d, None) | |||||
| self.parser = LALR().build_parser(rules, callback, 'start') | |||||
| self.simplify_tree = SimplifyTree() | |||||
| self.simplify_rule = SimplifyRule_Visitor() | |||||
| self.rule_tree_to_text = RuleTreeToText() | |||||
| def load_grammar(self, grammar_text): | def load_grammar(self, grammar_text): | ||||
| sd = SaveDefinitions() | |||||
| c = self._generate_parser_callbacks(sd) | |||||
| p = Parser(self.ga, c) | |||||
| p.parse( list(self.lexer.lex(grammar_text+"\n")) ) | |||||
| token_stream = list(self.lexer.lex(grammar_text+"\n")) | |||||
| tree = self.simplify_tree.transform( self.parser.parse(token_stream) ) | |||||
| # ================= | |||||
| # Process Tokens | |||||
| # ================= | |||||
| token_set = set() | |||||
| tokens = list(get_tokens(tree, token_set)) | |||||
| extract_anon = ExtractAnonTokens(tokens, token_set) | |||||
| tree = extract_anon.transform(tree) # Adds to tokens | |||||
| # Tokens | |||||
| token_ref = {} | token_ref = {} | ||||
| re_tokens = [] | re_tokens = [] | ||||
| str_tokens = [] | str_tokens = [] | ||||
| for name, token, flags in sd.tokens: | |||||
| for name, token, flags in tokens: | |||||
| value = token.value[1:-1] | value = token.value[1:-1] | ||||
| if '\u' in value: | if '\u' in value: | ||||
| # XXX for now, you can't mix unicode escaping and unicode characters at the same token | # XXX for now, you can't mix unicode escaping and unicode characters at the same token | ||||
| @@ -343,43 +316,70 @@ class GrammarLoader: | |||||
| re_tokens.sort(key=lambda x:len(x[1]), reverse=True) | re_tokens.sort(key=lambda x:len(x[1]), reverse=True) | ||||
| tokens = str_tokens + re_tokens # Order is important! | tokens = str_tokens + re_tokens # Order is important! | ||||
| # Rules | |||||
| # ================= | |||||
| # Process Rules | |||||
| # ================= | |||||
| ebnf_to_bnf = EBNF_to_BNF() | ebnf_to_bnf = EBNF_to_BNF() | ||||
| rules = {name: ebnf_to_bnf.transform(r) for name, r in sd.rules.items()} | |||||
| rules = {} | |||||
| for rule in tree.find_data('rule'): | |||||
| name, ebnf_tree = rule.children | |||||
| name = name.value | |||||
| if name in rules: | |||||
| raise ValueError("Rule '%s' defined more than once" % name) | |||||
| rules[name] = ebnf_to_bnf.transform(ebnf_tree) | |||||
| dict_update_safe(rules, ebnf_to_bnf.new_rules) | dict_update_safe(rules, ebnf_to_bnf.new_rules) | ||||
| for r in rules.values(): | for r in rules.values(): | ||||
| self.simplify_rule.visit(r) | self.simplify_rule.visit(r) | ||||
| rules = {origin: self.rule_tree_to_text.transform(tree) for origin, tree in rules.items()} | |||||
| # ==================== | |||||
| # Verify correctness | |||||
| # ==================== | |||||
| used_symbols = {symbol for expansions in rules.values() | |||||
| for expansion, _alias in expansions | |||||
| for symbol in expansion} | |||||
| rule_set = {r.lstrip('?') for r in rules} | |||||
| for sym in used_symbols: | |||||
| if is_terminal(sym): | |||||
| if sym not in token_set: | |||||
| raise GrammarError("Token '%s' used but not defined" % sym) | |||||
| else: | |||||
| if sym not in rule_set: | |||||
| raise GrammarError("Rule '%s' used but not defined" % sym) | |||||
| return tokens, rules | return tokens, rules | ||||
| load_grammar = GrammarLoader().load_grammar | load_grammar = GrammarLoader().load_grammar | ||||
| def test(): | def test(): | ||||
| g = """ | g = """ | ||||
| start: add | start: add | ||||
| # Rules | |||||
| // Rules | |||||
| add: mul | add: mul | ||||
| | add _add_sym mul | | add _add_sym mul | ||||
| mul: _atom | |||||
| | mul _add_mul _atom | |||||
| mul: [mul _add_mul] _atom | |||||
| neg: "-" _atom | |||||
| _atom: neg | |||||
| | number | |||||
| _atom: "-" _atom -> neg | |||||
| | NUMBER | |||||
| | "(" add ")" | | "(" add ")" | ||||
| # Tokens | |||||
| number: /[\d.]+/ | |||||
| // Tokens | |||||
| NUMBER: /[\d.]+/ | |||||
| _add_sym: "+" | "-" | _add_sym: "+" | "-" | ||||
| _add_mul: "*" | "/" | _add_mul: "*" | "/" | ||||
| WS.ignore: /\s+/ | |||||
| WS.ignore.newline: /\s+/ | |||||
| """ | """ | ||||
| g2 = """ | g2 = """ | ||||
| @@ -389,7 +389,9 @@ def test(): | |||||
| c: "c" | c: "c" | ||||
| d: "+" | "-" | d: "+" | "-" | ||||
| """ | """ | ||||
| load_grammar(g) | |||||
| # print load_grammar(g) | |||||
| print GrammarLoader().load_grammar2(g) | |||||
| if __name__ == '__main__': | |||||
| test() | |||||
| @@ -0,0 +1,76 @@ | |||||
| from .grammar_analysis import is_terminal | |||||
| class Callback(object): | |||||
| pass | |||||
| def create_expand1_tree_builder_function(tree_builder): | |||||
| def f(children): | |||||
| if len(children) == 1: | |||||
| return children[0] | |||||
| else: | |||||
| return tree_builder(children) | |||||
| return f | |||||
| def create_rule_handler(expansion, usermethod): | |||||
| to_include = [(i, sym.startswith('_')) for i, sym in enumerate(expansion) | |||||
| if not (is_terminal(sym) and sym.startswith('_'))] | |||||
| def _build_ast(match): | |||||
| children = [] | |||||
| for i, to_expand in to_include: | |||||
| if to_expand: | |||||
| children += match[i].children | |||||
| else: | |||||
| children.append(match[i]) | |||||
| return usermethod(children) | |||||
| return _build_ast | |||||
| class ParseTreeBuilder: | |||||
| def __init__(self, tree_class): | |||||
| self.tree_class = tree_class | |||||
| def _create_tree_builder_function(self, name): | |||||
| tree_class = self.tree_class | |||||
| def f(children): | |||||
| return tree_class(name, children) | |||||
| return f | |||||
| def create_tree_builder(self, rules, transformer): | |||||
| callback = Callback() | |||||
| new_rules = [] | |||||
| for origin, expansions in rules.items(): | |||||
| expand1 = origin.startswith('?') | |||||
| _origin = origin.lstrip('?*') | |||||
| for expansion, alias in expansions: | |||||
| if alias and origin.startswith('_'): | |||||
| raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases" % origin) | |||||
| if alias: | |||||
| alias = alias.lstrip('*') | |||||
| _alias = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | |||||
| try: | |||||
| f = transformer._get_func(alias or _origin) | |||||
| except AttributeError: | |||||
| if alias: | |||||
| f = self._create_tree_builder_function(alias) | |||||
| else: | |||||
| f = self._create_tree_builder_function(_origin) | |||||
| if expand1: | |||||
| f = create_expand1_tree_builder_function(f) | |||||
| alias_handler = create_rule_handler(expansion, f) | |||||
| assert not hasattr(callback, _alias) | |||||
| setattr(callback, _alias, alias_handler) | |||||
| new_rules.append(( _origin, expansion, _alias )) | |||||
| return new_rules, callback | |||||
| @@ -34,7 +34,7 @@ class Parser(object): | |||||
| res = self.callbacks[rule]([x[0] for x in s]) | res = self.callbacks[rule]([x[0] for x in s]) | ||||
| if rule.origin == 'start': | |||||
| if rule.origin == self.ga.start_symbol and len(stack) == 1: | |||||
| return res | return res | ||||
| _action, new_state = get_action(rule.origin) | _action, new_state = get_action(rule.origin) | ||||
| @@ -0,0 +1,31 @@ | |||||
| from .grammar_analysis import GrammarAnalyzer | |||||
| from common import is_terminal | |||||
| from . import parser, earley | |||||
| class LALR: | |||||
| def build_parser(self, rules, callback, start): | |||||
| ga = GrammarAnalyzer(rules, start) | |||||
| ga.analyze() | |||||
| return parser.Parser(ga, callback) | |||||
| class Earley: | |||||
| @staticmethod | |||||
| def _process_expansion(x): | |||||
| return [{'literal': s} if is_terminal(s) else s for s in x] | |||||
| def build_parser(self, rules, callback, start): | |||||
| rules = [{'name':n, 'symbols': self._process_expansion(x), 'postprocess':getattr(callback, a)} for n,x,a in rules] | |||||
| return EarleyParser(earley.Parser(rules, start)) | |||||
| class EarleyParser: | |||||
| def __init__(self, parser): | |||||
| self.parser = parser | |||||
| def parse(self, text): | |||||
| res = self.parser.parse(text) | |||||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||||
| return res[0] | |||||
| ENGINE_DICT = { 'lalr': LALR, 'earley': Earley } | |||||
| @@ -0,0 +1,14 @@ | |||||
| from __future__ import absolute_import, print_function | |||||
| import unittest | |||||
| import logging | |||||
| from .test_trees import TestTrees | |||||
| # from .test_selectors import TestSelectors | |||||
| from .test_parser import TestLalr | |||||
| # from .test_grammars import TestPythonG, TestConfigG | |||||
| logging.basicConfig(level=logging.INFO) | |||||
| if __name__ == '__main__': | |||||
| unittest.main() | |||||
| @@ -0,0 +1,326 @@ | |||||
| from __future__ import absolute_import | |||||
| import unittest | |||||
| import logging | |||||
| import os | |||||
| import sys | |||||
| try: | |||||
| from cStringIO import StringIO as cStringIO | |||||
| except ImportError: | |||||
| # Available only in Python 2.x, 3.x only has io.StringIO from below | |||||
| cStringIO = None | |||||
| from io import ( | |||||
| StringIO as uStringIO, | |||||
| open, | |||||
| ) | |||||
| logging.basicConfig(level=logging.INFO) | |||||
| from lark.lark import Lark | |||||
| from lark.grammar_analysis import GrammarError | |||||
| from lark.parser import ParseError | |||||
| __path__ = os.path.dirname(__file__) | |||||
| def _read(n, *args): | |||||
| with open(os.path.join(__path__, n), *args) as f: | |||||
| return f.read() | |||||
| class TestLalr(unittest.TestCase): | |||||
| def test_basic1(self): | |||||
| g = Lark("""start: a+ b a* "b" a* | |||||
| b: "b" | |||||
| a: "a" | |||||
| """, parser='lalr') | |||||
| r = g.parse('aaabaab') | |||||
| self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' ) | |||||
| r = g.parse('aaabaaba') | |||||
| self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' ) | |||||
| self.assertRaises(ParseError, g.parse, 'aaabaa') | |||||
| def test_basic2(self): | |||||
| # Multiple parsers and colliding tokens | |||||
| g = Lark("""start: B A | |||||
| B: "12" | |||||
| A: "1" """) | |||||
| g2 = Lark("""start: B A | |||||
| B: "12" | |||||
| A: "2" """) | |||||
| x = g.parse('121') | |||||
| assert x.data == 'start' and x.children == ['12', '1'], x | |||||
| x = g2.parse('122') | |||||
| assert x.data == 'start' and x.children == ['12', '2'], x | |||||
| def test_basic3(self): | |||||
| "Tests that Earley and LALR parsers produce equal trees" | |||||
| g = Lark("""start: "(" name_list ("," "*" NAME)? ")" | |||||
| name_list: NAME | name_list "," NAME | |||||
| NAME: /\w+/ """, parser='lalr') | |||||
| l = g.parse('(a,b,c,*x)') | |||||
| g = Lark("""start: "(" name_list ("," "*" NAME)? ")" | |||||
| name_list: NAME | name_list "," NAME | |||||
| NAME: /\w+/ """) | |||||
| l2 = g.parse('(a,b,c,*x)') | |||||
| assert l == l2, '%s != %s' % (l.pretty(), l2.pretty()) | |||||
| @unittest.skipIf(cStringIO is None, "cStringIO not available") | |||||
| def test_stringio_bytes(self): | |||||
| """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object""" | |||||
| Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" ')) | |||||
| def test_stringio_unicode(self): | |||||
| """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object""" | |||||
| Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" ')) | |||||
| def test_unicode(self): | |||||
| g = Lark(u"""start: UNIA UNIB UNIA | |||||
| UNIA: /\xa3/ | |||||
| UNIB: /\u0101/ | |||||
| """) | |||||
| g.parse(u'\xa3\u0101\u00a3') | |||||
| def test_unicode2(self): | |||||
| g = Lark(r"""start: UNIA UNIB UNIA UNIC | |||||
| UNIA: /\xa3/ | |||||
| UNIB: "a\u0101b\ " | |||||
| UNIC: /a?\u0101c\n/ | |||||
| """) | |||||
| g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n') | |||||
| def test_recurse_expansion(self): | |||||
| """Verify that stack depth doesn't get exceeded on recursive rules marked for expansion.""" | |||||
| g = Lark(r"""start: a | start a | |||||
| a : "a" """) | |||||
| # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built | |||||
| # STree data structures, which uses recursion). | |||||
| g.parse("a" * (sys.getrecursionlimit() // 4)) | |||||
| def test_expand1_lists_with_one_item(self): | |||||
| g = Lark(r"""start: list | |||||
| ?list: item+ | |||||
| item : A | |||||
| A: "a" | |||||
| """) | |||||
| r = g.parse("a") | |||||
| # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item' | |||||
| self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',)) | |||||
| # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule | |||||
| self.assertEqual(len(r.children), 1) | |||||
| def test_expand1_lists_with_one_item_2(self): | |||||
| g = Lark(r"""start: list | |||||
| ?list: item+ "!" | |||||
| item : A | |||||
| A: "a" | |||||
| """) | |||||
| r = g.parse("a!") | |||||
| # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item' | |||||
| self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',)) | |||||
| # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule | |||||
| self.assertEqual(len(r.children), 1) | |||||
| def test_dont_expand1_lists_with_multiple_items(self): | |||||
| g = Lark(r"""start: list | |||||
| ?list: item+ | |||||
| item : A | |||||
| A: "a" | |||||
| """) | |||||
| r = g.parse("aa") | |||||
| # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded | |||||
| self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) | |||||
| # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule | |||||
| self.assertEqual(len(r.children), 1) | |||||
| # Sanity check: verify that 'list' contains the two 'item's we've given it | |||||
| [list] = r.children | |||||
| self.assertSequenceEqual([item.data for item in list.children], ('item', 'item')) | |||||
| def test_dont_expand1_lists_with_multiple_items_2(self): | |||||
| g = Lark(r"""start: list | |||||
| ?list: item+ "!" | |||||
| item : A | |||||
| A: "a" | |||||
| """) | |||||
| r = g.parse("aa!") | |||||
| # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded | |||||
| self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) | |||||
| # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule | |||||
| self.assertEqual(len(r.children), 1) | |||||
| # Sanity check: verify that 'list' contains the two 'item's we've given it | |||||
| [list] = r.children | |||||
| self.assertSequenceEqual([item.data for item in list.children], ('item', 'item')) | |||||
| def test_empty_expand1_list(self): | |||||
| g = Lark(r"""start: list | |||||
| ?list: item* | |||||
| item : A | |||||
| A: "a" | |||||
| """) | |||||
| r = g.parse("") | |||||
| # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded | |||||
| self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) | |||||
| # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule | |||||
| self.assertEqual(len(r.children), 1) | |||||
| # Sanity check: verify that 'list' contains no 'item's as we've given it none | |||||
| [list] = r.children | |||||
| self.assertSequenceEqual([item.data for item in list.children], ()) | |||||
| def test_empty_expand1_list_2(self): | |||||
| g = Lark(r"""start: list | |||||
| ?list: item* "!"? | |||||
| item : A | |||||
| A: "a" | |||||
| """) | |||||
| r = g.parse("") | |||||
| # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded | |||||
| self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) | |||||
| # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule | |||||
| self.assertEqual(len(r.children), 1) | |||||
| # Sanity check: verify that 'list' contains no 'item's as we've given it none | |||||
| [list] = r.children | |||||
| self.assertSequenceEqual([item.data for item in list.children], ()) | |||||
| def test_empty_flatten_list(self): | |||||
| g = Lark(r"""start: list | |||||
| list: | item "," list | |||||
| item : A | |||||
| A: "a" | |||||
| """) | |||||
| r = g.parse("") | |||||
| # Because 'list' is a flatten rule it's top-level element should *never* be expanded | |||||
| self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) | |||||
| # Sanity check: verify that 'list' contains no 'item's as we've given it none | |||||
| [list] = r.children | |||||
| self.assertSequenceEqual([item.data for item in list.children], ()) | |||||
| @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)") | |||||
| def test_single_item_flatten_list(self): | |||||
| g = Lark(r"""start: list | |||||
| list: | item "," list | |||||
| item : A | |||||
| A: "a" | |||||
| """) | |||||
| r = g.parse("a,") | |||||
| # Because 'list' is a flatten rule it's top-level element should *never* be expanded | |||||
| self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) | |||||
| # Sanity check: verify that 'list' contains exactly the one 'item' we've given it | |||||
| [list] = r.children | |||||
| self.assertSequenceEqual([item.data for item in list.children], ('item',)) | |||||
| @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)") | |||||
| def test_multiple_item_flatten_list(self): | |||||
| g = Lark(r"""start: list | |||||
| #list: | item "," list | |||||
| item : A | |||||
| A: "a" | |||||
| """) | |||||
| r = g.parse("a,a,") | |||||
| # Because 'list' is a flatten rule it's top-level element should *never* be expanded | |||||
| self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) | |||||
| # Sanity check: verify that 'list' contains exactly the two 'item's we've given it | |||||
| [list] = r.children | |||||
| self.assertSequenceEqual([item.data for item in list.children], ('item', 'item')) | |||||
| @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)") | |||||
| def test_recurse_flatten(self): | |||||
| """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening.""" | |||||
| g = Lark(r"""start: a | start a | |||||
| a : A | |||||
| A : "a" """) | |||||
| # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built | |||||
| # STree data structures, which uses recursion). | |||||
| g.parse("a" * (sys.getrecursionlimit() // 4)) | |||||
| def test_token_collision(self): | |||||
| g = Lark("""start: "Hello" NAME | |||||
| NAME: /\w+/ | |||||
| WS.ignore: /\s+/ | |||||
| """, parser='lalr') | |||||
| x = g.parse('Hello World') | |||||
| self.assertSequenceEqual(x.children, ['World']) | |||||
| def test_undefined_rule(self): | |||||
| self.assertRaises(GrammarError, Lark, """start: a""", parser='lalr') | |||||
| def test_undefined_token(self): | |||||
| self.assertRaises(GrammarError, Lark, """start: A""", parser='lalr') | |||||
| def test_rule_collision(self): | |||||
| g = Lark("""start: "a"+ "b" | |||||
| | "a"+ """, parser='lalr') | |||||
| x = g.parse('aaaa') | |||||
| x = g.parse('aaaab') | |||||
| def test_rule_collision2(self): | |||||
| g = Lark("""start: "a"* "b" | |||||
| | "a"+ """, parser='lalr') | |||||
| x = g.parse('aaaa') | |||||
| x = g.parse('aaaab') | |||||
| x = g.parse('b') | |||||
| def test_regex_embed(self): | |||||
| g = Lark("""start: A B C | |||||
| A: /a/ | |||||
| B: /${A}b/ | |||||
| C: /${B}c/ | |||||
| """, parser='lalr') | |||||
| x = g.parse('aababc') | |||||
| def test_token_not_anon(self): | |||||
| """Tests that "a" is matched as A, rather than an anonymous token. | |||||
| That means that "a" is not filtered out, despite being an 'immediate string'. | |||||
| Whether or not this is the intuitive behavior, I'm not sure yet. | |||||
| -Erez | |||||
| """ | |||||
| g = Lark("""start: "a" | |||||
| A: "a" """, parser='lalr') | |||||
| x = g.parse('a') | |||||
| self.assertEqual(len(x.children), 1, '"a" should not be considered anonymous') | |||||
| self.assertEqual(x.children[0].type, "A") | |||||
| def test_maybe(self): | |||||
| g = Lark("""start: ["a"] """, parser='lalr') | |||||
| x = g.parse('a') | |||||
| x = g.parse('') | |||||
| def test_start(self): | |||||
| g = Lark("""a: "a" a? """, parser='lalr', start='a') | |||||
| x = g.parse('a') | |||||
| x = g.parse('aa') | |||||
| x = g.parse('aaa') | |||||
| if __name__ == '__main__': | |||||
| unittest.main() | |||||
| @@ -0,0 +1,26 @@ | |||||
| from __future__ import absolute_import | |||||
| from unittest import TestCase | |||||
| import logging | |||||
| import copy | |||||
| import pickle | |||||
| from lark.tree import Tree | |||||
| class TestTrees(TestCase): | |||||
| def setUp(self): | |||||
| self.tree1 = Tree('a', [Tree(x, y) for x, y in zip('bcd', 'xyz')]) | |||||
| def test_deepcopy(self): | |||||
| assert self.tree1 == copy.deepcopy(self.tree1) | |||||
| def test_pickle(self): | |||||
| s = copy.deepcopy(self.tree1) | |||||
| data = pickle.dumps(s) | |||||
| assert pickle.loads(data) == s | |||||
| if __name__ == '__main__': | |||||
| unittest.main() | |||||
| @@ -33,6 +33,19 @@ class Tree(object): | |||||
| def __eq__(self, other): | def __eq__(self, other): | ||||
| return self.data == other.data and self.children == other.children | return self.data == other.data and self.children == other.children | ||||
| def find_pred(self, pred): | |||||
| if pred(self): | |||||
| yield self | |||||
| else: | |||||
| for i, c in enumerate(self.children): | |||||
| if isinstance(c, Tree): | |||||
| for t in c.find_pred(pred): | |||||
| yield t | |||||
| def find_data(self, data): | |||||
| return self.find_pred(lambda t: t.data == data) | |||||
| # def find_path(self, pred): | # def find_path(self, pred): | ||||
| # if pred(self): | # if pred(self): | ||||
| # yield [] | # yield [] | ||||