| @@ -19,13 +19,12 @@ parser = Lark(r""" | |||
| start: _NL? section+ | |||
| section: "[" NAME "]" _NL item+ | |||
| item: NAME "=" VALUE _NL | |||
| NAME: /[a-zA-Z_]\w*/ | |||
| VALUE: /.*/ | |||
| VALUE: /./* | |||
| %import common.CNAME -> NAME | |||
| %import common.NEWLINE -> _NL | |||
| _NL: /(\r?\n)+/ | |||
| %ignore /[\t \f]+/ | |||
| %ignore /\#[^\n]*/ | |||
| %import common.WS_INLINE | |||
| %ignore WS_INLINE | |||
| """, parser="lalr", lexer="contextual") | |||
| @@ -12,25 +12,21 @@ | |||
| # See examples/conf.py for an example of that approach. | |||
| # | |||
| from lark import Lark, Transformer | |||
| from lark import Lark | |||
| parser = Lark(r""" | |||
| start: _nl? section+ | |||
| section: "[" name "]" _nl item+ | |||
| item: name "=" value _nl | |||
| name: /[a-zA-Z_]/ /\w/* | |||
| value: /./+ | |||
| _nl: (_CR? _LF)+ | |||
| _CR : /\r/ | |||
| _LF : /\n/ | |||
| start: _NL? section+ | |||
| section: "[" NAME "]" _NL item+ | |||
| item: NAME "=" VALUE _NL | |||
| VALUE: /./* | |||
| %import common.CNAME -> NAME | |||
| %import common.NEWLINE -> _NL | |||
| %import common.WS_INLINE | |||
| %ignore WS_INLINE | |||
| """, lexer=None) | |||
| class RestoreTokens(Transformer): | |||
| value = ''.join | |||
| name = ''.join | |||
| def test(): | |||
| sample_conf = """ | |||
| [bla] | |||
| @@ -40,7 +36,7 @@ this="that",4 | |||
| """ | |||
| r = parser.parse(sample_conf) | |||
| print(RestoreTokens().transform(r).pretty()) | |||
| print r.pretty() | |||
| if __name__ == '__main__': | |||
| test() | |||
| @@ -39,3 +39,7 @@ CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)* | |||
| WS_INLINE: (" "|/\t/)+ | |||
| WS: /[ \t\f\r\n]/+ | |||
| CR : /\r/ | |||
| LF : /\n/ | |||
| NEWLINE: (CR? LF)+ | |||
| @@ -119,21 +119,23 @@ class Lark: | |||
| assert not self.options.profile, "Feature temporarily disabled" | |||
| self.profiler = Profiler() if self.options.profile else None | |||
| lexer = self.options.lexer | |||
| if lexer == 'auto': | |||
| if self.options.parser == 'lalr': | |||
| lexer = 'standard' | |||
| elif self.options.parser == 'earley': | |||
| lexer = 'standard' | |||
| self.options.lexer = lexer | |||
| self.grammar = load_grammar(grammar) | |||
| tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=True) | |||
| tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer)) | |||
| self.ignore_tokens = self.grammar.extra['ignore'] | |||
| self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex) | |||
| if self.options.lexer == 'auto': | |||
| if self.options.parser == 'lalr': | |||
| self.options.lexer = 'standard' | |||
| elif self.options.parser == 'earley': | |||
| self.options.lexer = 'standard' | |||
| if self.options.parser: | |||
| self.parser = self._build_parser() | |||
| elif self.options.lexer: | |||
| elif lexer: | |||
| self.lexer = self._build_lexer() | |||
| if self.profiler: self.profiler.enter_section('outside_lark') | |||
| @@ -239,6 +239,15 @@ class ExtractAnonTokens(InlineTransformer): | |||
| self.re_reverse = {td.pattern.value: td.name for td in tokens if isinstance(td.pattern, PatternRE)} | |||
| self.i = 0 | |||
| def range(self, start, end): | |||
| assert start.type == end.type == 'STRING' | |||
| start = start.value[1:-1] | |||
| end = end.value[1:-1] | |||
| assert len(start) == len(end) == 1 | |||
| regexp = '/[%s-%s]/' % (start, end) | |||
| t = Token('REGEXP', regexp) | |||
| return self.tokenvalue(t) | |||
| def tokenvalue(self, token): | |||
| value = token.value[1:-1] | |||
| if token.type == 'STRING': | |||
| @@ -325,8 +334,19 @@ class Grammar: | |||
| self.extra = extra | |||
| def compile(self, lexer=False): | |||
| assert lexer | |||
| # assert lexer | |||
| if not lexer: | |||
| self.rule_defs += self.token_defs | |||
| self.token_defs = [] | |||
| for name, tree in self.rule_defs: | |||
| for tokenvalue in tree.find_data('tokenvalue'): | |||
| value ,= tokenvalue.children | |||
| if value.type == 'STRING': | |||
| assert value[0] == value[-1] == '"' | |||
| if len(value)>3: | |||
| tokenvalue.data = 'expansion' | |||
| tokenvalue.children = [T('tokenvalue', [Token('STRING', '"%s"'%ch)]) for ch in value[1:-1]] | |||
| tokendefs = list(self.token_defs) | |||
| # ================= | |||
| @@ -6,6 +6,7 @@ from .lexer import Lexer, ContextualLexer, Token | |||
| from .common import is_terminal, GrammarError, ParserConf | |||
| from .parsers import lalr_parser, earley, nearley | |||
| from .parsers.grammar_analysis import Rule | |||
| from .tree import Transformer | |||
| class WithLexer: | |||
| def __init__(self, lexer_conf): | |||
| @@ -121,10 +122,16 @@ class Nearley_NoLex: | |||
| class Earley_NoLex: | |||
| def __init__(self, lexer_conf, parser_conf): | |||
| self.tokens_to_convert = {name: '__token_'+name for name, tree, _ in parser_conf.rules if is_terminal(name)} | |||
| rules = [] | |||
| for name, exp, alias in parser_conf.rules: | |||
| name = self.tokens_to_convert.get(name, name) | |||
| exp = [self.tokens_to_convert.get(x, x) for x in exp] | |||
| rules.append((name, exp, alias)) | |||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||
| rules = [(n, list(self._prepare_expansion(x)), a) | |||
| for n,x,a in parser_conf.rules] | |||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in rules] | |||
| self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) | |||
| @@ -142,7 +149,16 @@ class Earley_NoLex: | |||
| def parse(self, text): | |||
| res = self.parser.parse(text) | |||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||
| return res[0] | |||
| res = res[0] | |||
| class RestoreTokens(Transformer): | |||
| pass | |||
| for t in self.tokens_to_convert: | |||
| setattr(RestoreTokens, t, ''.join) | |||
| res = RestoreTokens().transform(res) | |||
| return res | |||
| def get_frontend(parser, lexer): | |||
| @@ -39,9 +39,19 @@ class TestParsers(unittest.TestCase): | |||
| l2 = g.parse('(a,b,c,*x)') | |||
| assert l == l2, '%s != %s' % (l.pretty(), l2.pretty()) | |||
| def test_earley_nolex(self): | |||
| g = Lark("""start: A "b" c | |||
| A: "a"+ | |||
| c: "abc" | |||
| """, parser="earley", lexer=None) | |||
| x = g.parse('aaaababc') | |||
| class TestEarley(unittest.TestCase): | |||
| pass | |||
| def _make_parser_test(LEXER, PARSER): | |||
| def _Lark(grammar, **kwargs): | |||
| return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | |||