| @@ -12,29 +12,31 @@ The Lark class is the main interface for the library. It's mostly a thin wrapper | |||||
| The Lark class accepts a grammar string or file object, and keyword options: | The Lark class accepts a grammar string or file object, and keyword options: | ||||
| * start - The symbol in the grammar that begins the parse (Default: `"start"`) | |||||
| * **start** - A list of the rules in the grammar that begin the parse (Default: `["start"]`) | |||||
| * parser - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`) | |||||
| * **parser** - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`) | |||||
| * lexer - Overrides default lexer. | |||||
| * **lexer** - Overrides default lexer, depending on parser. | |||||
| * transformer - Applies the transformer instead of building a parse tree (only allowed with parser="lalr") | |||||
| * **transformer** - Applies the provided transformer instead of building a parse tree (only allowed with parser="lalr") | |||||
| * postlex - Lexer post-processing (Default: None. only works when lexer is "standard" or "contextual") | |||||
| * **postlex** - Lexer post-processing (Default: `None`. only works when lexer is "standard" or "contextual") | |||||
| * ambiguity (only relevant for earley and cyk) | |||||
| * **ambiguity** (only relevant for earley and cyk) | |||||
| * "explicit" - Return all derivations inside an "_ambig" data node. | * "explicit" - Return all derivations inside an "_ambig" data node. | ||||
| * "resolve" - Let the parser choose the best derivation (greedy for tokens, non-greedy for rules. Default) | * "resolve" - Let the parser choose the best derivation (greedy for tokens, non-greedy for rules. Default) | ||||
| * debug - Display warnings (such as Shift-Reduce warnings for LALR) | |||||
| * **debug** - Display warnings (such as Shift-Reduce warnings for LALR) | |||||
| * keep_all_tokens - Don't throw away any terminals from the tree (Default=False) | |||||
| * **keep_all_tokens** - Don't throw away any terminals from the tree (Default=`False`) | |||||
| * propagate_positions - Propagate line/column count to tree nodes (default=False) | |||||
| * **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`True`) | |||||
| * lexer_callbacks - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information. | |||||
| * **maybe_placeholders** - The `[]` operator returns `None` when not matched. Setting this to `False` makes it behave like the `?` operator, and return no value at all, which may be a little faster (default=`True`) | |||||
| * **lexer_callbacks** - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information. | |||||
| #### parse(self, text) | #### parse(self, text) | ||||
| @@ -50,7 +52,7 @@ The main tree class | |||||
| * `data` - The name of the rule or alias | * `data` - The name of the rule or alias | ||||
| * `children` - List of matched sub-rules and terminals | * `children` - List of matched sub-rules and terminals | ||||
| * `meta` - Line & Column numbers, if using `propagate_positions` | |||||
| * `meta` - Line & Column numbers (unless `propagate_positions` is disabled) | |||||
| #### \_\_init\_\_(self, data, children) | #### \_\_init\_\_(self, data, children) | ||||
| @@ -147,7 +147,7 @@ Each item is one of: | |||||
| * `TERMINAL` | * `TERMINAL` | ||||
| * `"string literal"` or `/regexp literal/` | * `"string literal"` or `/regexp literal/` | ||||
| * `(item item ..)` - Group items | * `(item item ..)` - Group items | ||||
| * `[item item ..]` - Maybe. Same as `(item item ..)?` | |||||
| * `[item item ..]` - Maybe. Same as `(item item ..)?`, but generates `None` if there is no match | |||||
| * `item?` - Zero or one instances of item ("maybe") | * `item?` - Zero or one instances of item ("maybe") | ||||
| * `item*` - Zero or more instances of item | * `item*` - Zero or more instances of item | ||||
| * `item+` - One or more instances of item | * `item+` - One or more instances of item | ||||
| @@ -157,7 +157,7 @@ Each item is one of: | |||||
| **Examples:** | **Examples:** | ||||
| ```perl | ```perl | ||||
| hello_world: "hello" "world" | hello_world: "hello" "world" | ||||
| mul: [mul "*"] number //# Left-recursion is allowed! | |||||
| mul: (mul "*")? number //# Left-recursion is allowed and encouraged! | |||||
| expr: expr operator expr | expr: expr operator expr | ||||
| | value //# Multi-line, belongs to expr | | value //# Multi-line, belongs to expr | ||||
| @@ -29,7 +29,7 @@ parser = Lark(""" | |||||
| data_item: STR INT* | data_item: STR INT* | ||||
| %declare STR INT | %declare STR INT | ||||
| """, parser='lalr', lexer=TypeLexer) | |||||
| """, parser='lalr', lexer=TypeLexer, propagate_positions=False) | |||||
| class ParseToDict(Transformer): | class ParseToDict(Transformer): | ||||
| @@ -25,15 +25,9 @@ test_json = ''' | |||||
| def test_earley(): | def test_earley(): | ||||
| json_parser = Lark(json_grammar) | |||||
| json_parser = Lark(json_grammar, maybe_placeholders=False) | |||||
| tree = json_parser.parse(test_json) | tree = json_parser.parse(test_json) | ||||
| # print ('@@', tree.pretty()) | |||||
| # for x in tree.find_data('true'): | |||||
| # x.data = 'false' | |||||
| # # x.children[0].value = '"HAHA"' | |||||
| new_json = Reconstructor(json_parser).reconstruct(tree) | new_json = Reconstructor(json_parser).reconstruct(tree) | ||||
| print (new_json) | print (new_json) | ||||
| print (json.loads(new_json) == json.loads(test_json)) | print (json.loads(new_json) == json.loads(test_json)) | ||||
| @@ -41,7 +35,7 @@ def test_earley(): | |||||
| def test_lalr(): | def test_lalr(): | ||||
| json_parser = Lark(json_grammar, parser='lalr') | |||||
| json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False) | |||||
| tree = json_parser.parse(test_json) | tree = json_parser.parse(test_json) | ||||
| new_json = Reconstructor(json_parser).reconstruct(tree) | new_json = Reconstructor(json_parser).reconstruct(tree) | ||||
| @@ -66,9 +66,9 @@ class LarkOptions(Serialize): | |||||
| 'profile': False, | 'profile': False, | ||||
| 'priority': 'auto', | 'priority': 'auto', | ||||
| 'ambiguity': 'auto', | 'ambiguity': 'auto', | ||||
| 'propagate_positions': False, | |||||
| 'propagate_positions': True, | |||||
| 'lexer_callbacks': {}, | 'lexer_callbacks': {}, | ||||
| 'maybe_placeholders': False, | |||||
| 'maybe_placeholders': True, | |||||
| 'edit_terminals': None, | 'edit_terminals': None, | ||||
| } | } | ||||
| @@ -69,6 +69,7 @@ class MakeMatchTree: | |||||
| class Reconstructor: | class Reconstructor: | ||||
| def __init__(self, parser): | def __init__(self, parser): | ||||
| # XXX TODO calling compile twice returns different results! | # XXX TODO calling compile twice returns different results! | ||||
| assert parser.options.maybe_placeholders == False | |||||
| tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) | tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) | ||||
| self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) | self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) | ||||
| @@ -963,7 +963,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| @unittest.skipIf(PARSER == 'cyk', "No empty rules") | @unittest.skipIf(PARSER == 'cyk', "No empty rules") | ||||
| def test_twice_empty(self): | def test_twice_empty(self): | ||||
| g = """!start: [["A"]] | |||||
| g = """!start: ("A"?)? | |||||
| """ | """ | ||||
| l = _Lark(g) | l = _Lark(g) | ||||
| tree = l.parse('A') | tree = l.parse('A') | ||||
| @@ -16,7 +16,7 @@ def _remove_ws(s): | |||||
| class TestReconstructor(TestCase): | class TestReconstructor(TestCase): | ||||
| def assert_reconstruct(self, grammar, code): | def assert_reconstruct(self, grammar, code): | ||||
| parser = Lark(grammar, parser='lalr') | |||||
| parser = Lark(grammar, parser='lalr', maybe_placeholders=False) | |||||
| tree = parser.parse(code) | tree = parser.parse(code) | ||||
| new = Reconstructor(parser).reconstruct(tree) | new = Reconstructor(parser).reconstruct(tree) | ||||
| self.assertEqual(_remove_ws(code), _remove_ws(new)) | self.assertEqual(_remove_ws(code), _remove_ws(new)) | ||||
| @@ -105,7 +105,7 @@ class TestReconstructor(TestCase): | |||||
| %ignore WS | %ignore WS | ||||
| """ | """ | ||||
| json_parser = Lark(json_grammar, parser='lalr') | |||||
| json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False) | |||||
| tree = json_parser.parse(test_json) | tree = json_parser.parse(test_json) | ||||
| new_json = Reconstructor(json_parser).reconstruct(tree) | new_json = Reconstructor(json_parser).reconstruct(tree) | ||||