Added the earley_nolex frontend, and a conf_nolex example to use it

9 years ago · da15f99edb
--- a/examples/calc.py
+++ b/examples/calc.py
@@ -1,3 +1,7 @@
 #
 # This example shows how to write a basic calculator with variables.
 #
 from lark import Lark, InlineTransformer
 calc_grammar = """
--- a/examples/conf_nolex.py
+++ b/examples/conf_nolex.py
@@ -0,0 +1,42 @@
 #
 # This example demonstrates lex-less parsing using the earley_nolex frontend
 #
 # Using a lexer for configuration files is tricky, because values don't
 # have to be surrounded by delimiters.
 # In this example with skip lexing and let the Earley parser resolve the ambiguity.
 #
 # Future versions of lark will make it easier to write these kinds of grammars.
 #
 from lark import Lark, Transformer
 parser = Lark(r"""
        start: _nl? section+
        section: "[" name "]" _nl item+
        item: name "=" value _nl
        name: /[a-zA-Z_]/ /\w/*
        value: /./+
        _nl: (_CR? _LF)+
        _CR : /\r/
        _LF : /\n/
    """, parser="earley_nolex")
 class RestoreTokens(Transformer):
    value = ''.join
    name = ''.join
 def test():
    sample_conf = """
 [bla]
 a=Hello
 this="that",4
 """
    r = parser.parse(sample_conf)
    print(RestoreTokens().transform(r).pretty())
 if __name__ == '__main__':
    test()
--- a/examples/indented_tree.py
+++ b/examples/indented_tree.py
@@ -1,8 +1,12 @@
 """This example demonstrates usage of the Indenter class.
 Since indentation is context-sensitive, a postlex stage is introduced to manufacture INDENT/DEDENT tokens.
 It is crucial for the indenter that the NL_type matches the spaces (and tabs) after the newline.
 """
 #
 # This example demonstrates usage of the Indenter class.
 #
 # Since indentation is context-sensitive, a postlex stage is introduced to
 # manufacture INDENT/DEDENT tokens.
 #
 # It is crucial for the indenter that the NL_type matches
 # the spaces (and tabs) after the newline.
 #
 from lark.lark import Lark
 from lark.indenter import Indenter
--- a/examples/json_parser.py
+++ b/examples/json_parser.py
@@ -1,3 +1,9 @@
 #
 # This example shows how to write a basic JSON parser
 #
 # The code is short and clear, but has good performance.
 #
 import sys
 from lark import Lark, inline_args, Transformer
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -1,9 +1,10 @@
 import re
 import sre_parse
 from .lexer import Lexer
 from .parsers.lalr_analysis import GrammarAnalyzer
 from .common import is_terminal
 from .common import is_terminal, GrammarError
 from .parsers import lalr_parser, earley
 class WithLexer:
@@ -54,7 +55,7 @@ class Earley(WithLexer):
        assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
        return res[0]
 class Earley2:
 class Earley_NoLex:
    def __init__(self, lexer_conf, parser_conf):
        self.token_by_name = {t.name:t for t in lexer_conf.tokens}
@@ -68,7 +69,11 @@ class Earley2:
    def _prepare_expansion(self, expansion):
        for sym in expansion:
            if is_terminal(sym):
                yield sym, re.compile(self.token_by_name[sym].to_regexp())
                regexp = self.token_by_name[sym].to_regexp()
                width = sre_parse.parse(regexp).getwidth()
                if not width == (1,1):
                    raise GrammarError('Dynamic lexing requires all tokens have the width 1 (%s is %s)' % (regexp, width))
                yield sym, re.compile(regexp)
            else:
                yield sym
@@ -77,4 +82,4 @@ class Earley2:
        assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
        return res[0]
 ENGINE_DICT = { 'lalr': LALR, 'earley': Earley }
 ENGINE_DICT = { 'lalr': LALR, 'earley': Earley, 'earley_nolex': Earley_NoLex }
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -43,9 +43,11 @@ class State(object):
            # PORT: originally tests regexp
            if self.expect_symbol[1] is not None:
                match = self.expect_symbol[1].match(stream, pos)
                match = self.expect_symbol[1].match(inp)
                if match:
                    return self.next_state(inp)
            if self.expect_symbol[0] == inp.type:
            elif self.expect_symbol[0] == inp.type:
                return self.next_state(inp)
    def consume_nonterminal(self, inp):