| @@ -1,3 +1,7 @@ | |||||
| # | |||||
| # This example shows how to write a basic calculator with variables. | |||||
| # | |||||
| from lark import Lark, InlineTransformer | from lark import Lark, InlineTransformer | ||||
| calc_grammar = """ | calc_grammar = """ | ||||
| @@ -0,0 +1,42 @@ | |||||
| # | |||||
| # This example demonstrates lex-less parsing using the earley_nolex frontend | |||||
| # | |||||
| # Using a lexer for configuration files is tricky, because values don't | |||||
| # have to be surrounded by delimiters. | |||||
| # In this example with skip lexing and let the Earley parser resolve the ambiguity. | |||||
| # | |||||
| # Future versions of lark will make it easier to write these kinds of grammars. | |||||
| # | |||||
| from lark import Lark, Transformer | |||||
| parser = Lark(r""" | |||||
| start: _nl? section+ | |||||
| section: "[" name "]" _nl item+ | |||||
| item: name "=" value _nl | |||||
| name: /[a-zA-Z_]/ /\w/* | |||||
| value: /./+ | |||||
| _nl: (_CR? _LF)+ | |||||
| _CR : /\r/ | |||||
| _LF : /\n/ | |||||
| """, parser="earley_nolex") | |||||
| class RestoreTokens(Transformer): | |||||
| value = ''.join | |||||
| name = ''.join | |||||
| def test(): | |||||
| sample_conf = """ | |||||
| [bla] | |||||
| a=Hello | |||||
| this="that",4 | |||||
| """ | |||||
| r = parser.parse(sample_conf) | |||||
| print(RestoreTokens().transform(r).pretty()) | |||||
| if __name__ == '__main__': | |||||
| test() | |||||
| @@ -1,8 +1,12 @@ | |||||
| """This example demonstrates usage of the Indenter class. | |||||
| Since indentation is context-sensitive, a postlex stage is introduced to manufacture INDENT/DEDENT tokens. | |||||
| It is crucial for the indenter that the NL_type matches the spaces (and tabs) after the newline. | |||||
| """ | |||||
| # | |||||
| # This example demonstrates usage of the Indenter class. | |||||
| # | |||||
| # Since indentation is context-sensitive, a postlex stage is introduced to | |||||
| # manufacture INDENT/DEDENT tokens. | |||||
| # | |||||
| # It is crucial for the indenter that the NL_type matches | |||||
| # the spaces (and tabs) after the newline. | |||||
| # | |||||
| from lark.lark import Lark | from lark.lark import Lark | ||||
| from lark.indenter import Indenter | from lark.indenter import Indenter | ||||
| @@ -1,3 +1,9 @@ | |||||
| # | |||||
| # This example shows how to write a basic JSON parser | |||||
| # | |||||
| # The code is short and clear, but has good performance. | |||||
| # | |||||
| import sys | import sys | ||||
| from lark import Lark, inline_args, Transformer | from lark import Lark, inline_args, Transformer | ||||
| @@ -1,9 +1,10 @@ | |||||
| import re | import re | ||||
| import sre_parse | |||||
| from .lexer import Lexer | from .lexer import Lexer | ||||
| from .parsers.lalr_analysis import GrammarAnalyzer | from .parsers.lalr_analysis import GrammarAnalyzer | ||||
| from .common import is_terminal | |||||
| from .common import is_terminal, GrammarError | |||||
| from .parsers import lalr_parser, earley | from .parsers import lalr_parser, earley | ||||
| class WithLexer: | class WithLexer: | ||||
| @@ -54,7 +55,7 @@ class Earley(WithLexer): | |||||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | ||||
| return res[0] | return res[0] | ||||
| class Earley2: | |||||
| class Earley_NoLex: | |||||
| def __init__(self, lexer_conf, parser_conf): | def __init__(self, lexer_conf, parser_conf): | ||||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | self.token_by_name = {t.name:t for t in lexer_conf.tokens} | ||||
| @@ -68,7 +69,11 @@ class Earley2: | |||||
| def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
| for sym in expansion: | for sym in expansion: | ||||
| if is_terminal(sym): | if is_terminal(sym): | ||||
| yield sym, re.compile(self.token_by_name[sym].to_regexp()) | |||||
| regexp = self.token_by_name[sym].to_regexp() | |||||
| width = sre_parse.parse(regexp).getwidth() | |||||
| if not width == (1,1): | |||||
| raise GrammarError('Dynamic lexing requires all tokens have the width 1 (%s is %s)' % (regexp, width)) | |||||
| yield sym, re.compile(regexp) | |||||
| else: | else: | ||||
| yield sym | yield sym | ||||
| @@ -77,4 +82,4 @@ class Earley2: | |||||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | ||||
| return res[0] | return res[0] | ||||
| ENGINE_DICT = { 'lalr': LALR, 'earley': Earley } | |||||
| ENGINE_DICT = { 'lalr': LALR, 'earley': Earley, 'earley_nolex': Earley_NoLex } | |||||
| @@ -43,9 +43,11 @@ class State(object): | |||||
| # PORT: originally tests regexp | # PORT: originally tests regexp | ||||
| if self.expect_symbol[1] is not None: | if self.expect_symbol[1] is not None: | ||||
| match = self.expect_symbol[1].match(stream, pos) | |||||
| match = self.expect_symbol[1].match(inp) | |||||
| if match: | |||||
| return self.next_state(inp) | |||||
| if self.expect_symbol[0] == inp.type: | |||||
| elif self.expect_symbol[0] == inp.type: | |||||
| return self.next_state(inp) | return self.next_state(inp) | ||||
| def consume_nonterminal(self, inp): | def consume_nonterminal(self, inp): | ||||