diff --git a/docs/grammar.md b/docs/grammar.md index ad70f6e..9343ee4 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -45,6 +45,12 @@ Literals can be one of: * `/re with flags/imulx` * Literal range: `"a".."z"`, `"1".."9"`, etc. +### Priority + +Terminals can be assigned priority only when using a lexer (future versions may support Earley's dynamic lexing). + +Priority can be either positive or negative. In not specified for a terminal, it's assumed to be 1 (i.e. the default). + #### Notes for when using a lexer: When using a lexer (standard or contextual), it is the grammar-author's responsibility to make sure the literals don't collide, or that if they do, they are matched in the desired order. Literals are matched in an order according to the following criteria: @@ -90,7 +96,7 @@ Each item is one of: * `item*` - Zero or more instances of item * `item+` - One or more instances of item * `item ~ n` - Exactly *n* instances of item -* `item ~ n..m` - Between *n* to *m* instances of item +* `item ~ n..m` - Between *n* to *m* instances of item (not recommended for wide ranges, due to performance issues) **Examples:** ```perl @@ -102,6 +108,11 @@ expr: expr operator expr four_words: word ~ 4 ``` +### Priority + +Rules can be assigned priority only when using Earley (future versions may support LALR as well). + +Priority can be either positive or negative. In not specified for a terminal, it's assumed to be 1 (i.e. the default). ## Directives diff --git a/docs/parsers.md b/docs/parsers.md index 35de223..fb7c997 100644 --- a/docs/parsers.md +++ b/docs/parsers.md @@ -7,7 +7,7 @@ An [Earley Parser](https://www.wikiwand.com/en/Earley_parser) is a chart parser Lark's Earley implementation runs on top of a skipping chart parser, which allows it to use regular expressions, instead of matching characters one-by-one. This is a huge improvement to Earley that is unique to Lark. This feature is used by default, but can also be requested explicitely using `lexer='dynamic'`. -It's possible to bypass the dynamic lexer, and use the regular Earley parser with a traditional lexer, that tokenizes as an independant first step. Doing so will provide a speed benefit, but will tokenize without using Earley's ambiguity-resolution ability. So choose this only if you know why! Activate with `lexer='standard'` +It's possible to bypass the dynamic lexing, and use the regular Earley parser with a traditional lexer, that tokenizes as an independant first step. Doing so will provide a speed benefit, but will tokenize without using Earley's ambiguity-resolution ability. So choose this only if you know why! Activate with `lexer='standard'` **SPPF & Ambiguity resolution** diff --git a/lark/load_grammar.py b/lark/load_grammar.py index f7b1011..f6c1d22 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -90,7 +90,7 @@ TERMINALS = { '_IGNORE': r'%ignore', '_DECLARE': r'%declare', '_IMPORT': r'%import', - 'NUMBER': r'\d+', + 'NUMBER': r'[+-]?\d+', } RULES = { @@ -196,7 +196,7 @@ class EBNF_to_BNF(Transformer_InPlace): mn = mx = int(args[0]) else: mn, mx = map(int, args) - if mx < mn: + if mx < mn or mn < 0: raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx)) return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)]) assert False, op diff --git a/tests/test_parser.py b/tests/test_parser.py index 3238ead..599406f 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1029,6 +1029,32 @@ def _make_parser_test(LEXER, PARSER): self.assertEqual(res.children, ['ab']) + grammar = """ + start: A B | AB + A: "a" + B.-20: "b" + AB.-10: "ab" + """ + l = _Lark(grammar) + res = l.parse("ab") + self.assertEqual(res.children, ['a', 'b']) + + + grammar = """ + start: A B | AB + A.-99999999999999999999999: "a" + B: "b" + AB: "ab" + """ + l = _Lark(grammar) + res = l.parse("ab") + + self.assertEqual(res.children, ['ab']) + + + + + def test_import(self): grammar = """