From 4d8301f73cbea06a6182e02aa120ec7c563a88db Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sat, 4 May 2019 20:13:39 +0300
Subject: [PATCH 001/132] Version Bump

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index 6328784..7fd92ee 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.7.0"
+__version__ = "0.7.1"

From 28e571f1c68984c72b582b12c00a11168f0b9d94 Mon Sep 17 00:00:00 2001
From: Paul Vinciguerra <pvinci@vinciconsulting.com>
Date: Mon, 6 May 2019 09:20:21 -0400
Subject: [PATCH 002/132] Fix DeprecationWarning in lalr_analysis.py

Under python 3.3+, logging.warn is deprecated.
Use logging.warning instead.

Fixes: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/lark/parsers/lalr_analysis.py:87: DeprecationWarning: The 'warn' function is deprecated, use 'warning' instead
---
 lark/parsers/lalr_analysis.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index cceaa45..54a4041 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -112,9 +112,9 @@ class LALR_Analyzer(GrammarAnalyzer):
             for k, v in lookahead.items():
                 if len(v) > 1:
                     if self.debug:
-                        logging.warn("Shift/reduce conflict for terminal %s:  (resolving as shift)", k.name)
+                        logging.warning("Shift/reduce conflict for terminal %s:  (resolving as shift)", k.name)
                         for act, arg in v:
-                            logging.warn(' * %s: %s', act, arg)
+                            logging.warning(' * %s: %s', act, arg)
                     for x in v:
                         # XXX resolving shift/reduce into shift, like PLY
                         # Give a proper warning

From 09afcfcfc7b77efa7e6001641def38454afcace4 Mon Sep 17 00:00:00 2001
From: fbindel <fabian.bindel@schoenhofer.de>
Date: Thu, 9 May 2019 14:41:41 +0200
Subject: [PATCH 003/132] Allow any graph attribute in `pydot__tree_to_png`.

Keeping the explicit `rankdir="LR"` as default, add `kwargs`
to `pydot__tree_to_png` and `pydot.Dot` so that all graphviz
attributes are available for the graph.
---
 lark/tree.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lark/tree.py b/lark/tree.py
index 3b845d6..fd0038e 100644
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -141,17 +141,19 @@ class SlottedTree(Tree):
     __slots__ = 'data', 'children', 'rule', '_meta'
 
 
-def pydot__tree_to_png(tree, filename, rankdir="LR"):
+def pydot__tree_to_png(tree, filename, rankdir="LR", **kwargs):
     """Creates a colorful image that represents the tree (data+children, without meta)
 
     Possible values for `rankdir` are "TB", "LR", "BT", "RL", corresponding to
     directed graphs drawn from top to bottom, from left to right, from bottom to
-    top, and from right to left, respectively. See:
-    https://www.graphviz.org/doc/info/attrs.html#k:rankdir
+    top, and from right to left, respectively.
+
+    `kwargs` can be any graph attribute (e. g. `dpi=200`). For a list of
+    possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
     """
 
     import pydot
-    graph = pydot.Dot(graph_type='digraph', rankdir=rankdir)
+    graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)
 
     i = [0]
 

From 0f9dfdd6237ea093ce038fa18de3e1764b89a6b1 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sat, 11 May 2019 09:42:16 +0300
Subject: [PATCH 004/132] Re-implemented CustomLexer after regression (Issue
 #377)

---
 lark/parser_frontends.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index f81001c..ab69d01 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -65,7 +65,7 @@ class WithLexer(Serialize):
         inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
         inst.init_lexer()
         return inst
-    
+
     def _serialize(self, data, memo):
         data['parser'] = data['parser'].serialize(memo)
 
@@ -107,11 +107,12 @@ class LALR_ContextualLexer(LALR_WithLexer):
 ###}
 
 class LALR_CustomLexer(LALR_WithLexer):
-    def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None):
-        pass    # TODO
-
-    def init_lexer(self):
+    def __init__(self, lexer_cls, lexer_conf, parser_conf, *, options=None):
         self.lexer = lexer_cls(self.lexer_conf)
+        debug = options.debug if options else False
+        self.parser = LALR_Parser(parser_conf, debug=debug)
+        WithLexer.__init__(self, lexer_conf, parser_conf, options)
+
 
 def tokenize_text(text):
     line = 1

From e5868415ebc9f9a985549f82ff8137806d78450d Mon Sep 17 00:00:00 2001
From: Mostafa Razavi <mostafa@sepent.com>
Date: Sun, 12 May 2019 19:32:50 +0200
Subject: [PATCH 005/132] Implement embedded in-place transformers. See #378.

As discussed in issue #378, when an embedded transformer (that is, one
passed to the Lark class using the transformer argument), is an
inplace transformer (either a subclass of Transformer_InPlace, or with
the @v_args(tree=True) decorator), the in-place transformer was not
working correctly and in-fact Lark used it like a normal non-in-place
transformer, expecting it to return the transformed value.
---
 lark/parse_tree_builder.py | 12 ++++++++++++
 tests/test_parser.py       | 27 ++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py
index 977c371..550bc17 100644
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -2,6 +2,7 @@ from .exceptions import GrammarError
 from .lexer import Token
 from .tree import Tree
 from .visitors import InlineTransformer # XXX Deprecated
+from .visitors import Transformer_InPlace
 
 ###{standalone
 from functools import partial, wraps
@@ -193,6 +194,15 @@ def ptb_inline_args(func):
         return func(*children)
     return f
 
+def inplace_transformer(func):
+    @wraps(func)
+    def f(children):
+        # function name in a Transformer is a rule name.
+        tree = Tree(func.__name__, children)
+        func(tree)
+        return tree
+    return f
+
 class ParseTreeBuilder:
     def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
         self.tree_class = tree_class
@@ -231,6 +241,8 @@ class ParseTreeBuilder:
                 # XXX InlineTransformer is deprecated!
                 if getattr(f, 'inline', False) or isinstance(transformer, InlineTransformer):
                     f = ptb_inline_args(f)
+                elif hasattr(f, 'whole_tree') or isinstance(transformer, Transformer_InPlace):
+                    f = inplace_transformer(f)
             except AttributeError:
                 f = partial(self.tree_class, user_callback_name)
 
diff --git a/tests/test_parser.py b/tests/test_parser.py
index ce8b7d6..0fddf14 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -20,7 +20,7 @@ logging.basicConfig(level=logging.INFO)
 from lark.lark import Lark
 from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
 from lark.tree import Tree
-from lark.visitors import Transformer
+from lark.visitors import Transformer, Transformer_InPlace, v_args
 from lark.grammar import Rule
 from lark.lexer import TerminalDef
 
@@ -150,6 +150,31 @@ class TestParsers(unittest.TestCase):
         r = g.parse("xx")
         self.assertEqual( r.children, ["<c>"] )
 
+    def test_embedded_transformer_inplace(self):
+        class T1(Transformer_InPlace):
+            def a(self, tree):
+                assert isinstance(tree, Tree)
+                tree.children.append("tested")
+
+        @v_args(tree=True)
+        class T2(Transformer):
+            def a(self, tree):
+                assert isinstance(tree, Tree)
+                tree.children.append("tested")
+
+        class T3(Transformer):
+            @v_args(tree=True)
+            def a(self, tree):
+                assert isinstance(tree, Tree)
+                tree.children.append("tested")
+
+        for t in [T1(), T2(), T3()]:
+            g = Lark("""start: a
+                        a : "x"
+                     """, parser='lalr', transformer=t)
+            r = g.parse("x")
+            first, = r.children
+            self.assertEqual(first.children, ["tested"])
 
     def test_alias(self):
         Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)

From f71df240b65c8425b6b10b4beb60fdee92f74cf6 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 12 May 2019 21:34:21 +0300
Subject: [PATCH 006/132] Removed Python2 incompatibility

---
 lark/parser_frontends.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index ab69d01..0634814 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -107,7 +107,7 @@ class LALR_ContextualLexer(LALR_WithLexer):
 ###}
 
 class LALR_CustomLexer(LALR_WithLexer):
-    def __init__(self, lexer_cls, lexer_conf, parser_conf, *, options=None):
+    def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None):
         self.lexer = lexer_cls(self.lexer_conf)
         debug = options.debug if options else False
         self.parser = LALR_Parser(parser_conf, debug=debug)

From a9106df824133f748e33544c6128cc355bc03dab Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 12 May 2019 22:11:13 +0300
Subject: [PATCH 007/132] Corrected thee Transformer's whole_tree interface,
 for both internal and external use

---
 lark/parse_tree_builder.py |  3 +--
 lark/visitors.py           |  2 +-
 tests/test_parser.py       | 34 +++++++++++++++++++++++++++-------
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py
index 550bc17..b54b6e8 100644
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -199,8 +199,7 @@ def inplace_transformer(func):
     def f(children):
         # function name in a Transformer is a rule name.
         tree = Tree(func.__name__, children)
-        func(tree)
-        return tree
+        return func(tree)
     return f
 
 class ParseTreeBuilder:
diff --git a/lark/visitors.py b/lark/visitors.py
index 53847f9..4a0f639 100644
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -36,7 +36,7 @@ class Transformer:
                     return f(*children)
                 elif getattr(f, 'whole_tree', False):
                     if new_children is not None:
-                        raise NotImplementedError("Doesn't work with the base Transformer class")
+                        tree.children = new_children
                     return f(tree)
                 else:
                     return f(children)
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 0fddf14..1cf702d 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -151,30 +151,50 @@ class TestParsers(unittest.TestCase):
         self.assertEqual( r.children, ["<c>"] )
 
     def test_embedded_transformer_inplace(self):
+        @v_args(tree=True)
         class T1(Transformer_InPlace):
             def a(self, tree):
-                assert isinstance(tree, Tree)
+                assert isinstance(tree, Tree), tree
                 tree.children.append("tested")
+                return tree
+
+            def b(self, tree):
+                return Tree(tree.data, tree.children + ['tested2'])
 
         @v_args(tree=True)
         class T2(Transformer):
             def a(self, tree):
                 assert isinstance(tree, Tree)
                 tree.children.append("tested")
+                return tree
+
+            def b(self, tree):
+                return Tree(tree.data, tree.children + ['tested2'])
 
         class T3(Transformer):
             @v_args(tree=True)
             def a(self, tree):
                 assert isinstance(tree, Tree)
                 tree.children.append("tested")
+                return tree
+
+            @v_args(tree=True)
+            def b(self, tree):
+                return Tree(tree.data, tree.children + ['tested2'])
 
         for t in [T1(), T2(), T3()]:
-            g = Lark("""start: a
-                        a : "x"
-                     """, parser='lalr', transformer=t)
-            r = g.parse("x")
-            first, = r.children
-            self.assertEqual(first.children, ["tested"])
+            for internal in [False, True]:
+                g = Lark("""start: a b
+                            a : "x"
+                            b : "y"
+                        """, parser='lalr', transformer=t if internal else None)
+                r = g.parse("xy")
+                if not internal:
+                    r = t.transform(r)
+
+                a, b = r.children
+                self.assertEqual(a.children, ["tested"])
+                self.assertEqual(b.children, ["tested2"])
 
     def test_alias(self):
         Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)

From e79689dce7eabd4fcaaedc1d50927725e76a53a4 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 19 May 2019 13:30:25 +0300
Subject: [PATCH 008/132] Remove unused rules (Issue #384)

---
 lark/lark.py         |  2 +-
 lark/load_grammar.py | 16 ++++++++++++++--
 lark/reconstruct.py  |  2 +-
 tests/test_parser.py | 13 +++++++++++++
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/lark/lark.py b/lark/lark.py
index 9bb49a3..87f7137 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -200,7 +200,7 @@ class Lark(Serialize):
         self.grammar = load_grammar(grammar, self.source)
 
         # Compile the EBNF grammar into BNF
-        self.terminals, self.rules, self.ignore_tokens = self.grammar.compile()
+        self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
 
         # If the user asked to invert the priorities, negate them all here.
         # This replaces the old 'resolve__antiscore_sum' option.
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index bfd8585..281dc5b 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -205,7 +205,7 @@ class EBNF_to_BNF(Transformer_InPlace):
         keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens
 
         def will_not_get_removed(sym):
-            if isinstance(sym, NonTerminal): 
+            if isinstance(sym, NonTerminal):
                 return not sym.name.startswith('_')
             if isinstance(sym, Terminal):
                 return keep_all_tokens or not sym.filter_out
@@ -465,7 +465,7 @@ class Grammar:
         self.rule_defs = rule_defs
         self.ignore = ignore
 
-    def compile(self):
+    def compile(self, start):
         # We change the trees in-place (to support huge grammars)
         # So deepcopy allows calling compile more than once.
         term_defs = deepcopy(list(self.term_defs))
@@ -546,6 +546,18 @@ class Grammar:
             # Remove duplicates
             compiled_rules = list(set(compiled_rules))
 
+
+        # Filter out unused rules
+        while True:
+            c = len(compiled_rules)
+            used_rules = {s for r in compiled_rules
+                                for s in r.expansion
+                                if isinstance(s, NonTerminal)
+                                and s != r.origin}
+            compiled_rules = [r for r in compiled_rules if r.origin.name==start or r.origin in used_rules]
+            if len(compiled_rules) == c:
+                break
+
         # Filter out unused terminals
         used_terms = {t.name for r in compiled_rules
                              for t in r.expansion
diff --git a/lark/reconstruct.py b/lark/reconstruct.py
index 2800840..c446913 100644
--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -69,7 +69,7 @@ class MakeMatchTree:
 class Reconstructor:
     def __init__(self, parser):
         # XXX TODO calling compile twice returns different results!
-        tokens, rules, _grammar_extra = parser.grammar.compile()
+        tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)
 
         self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
         self.rules = list(self._build_recons_rules(rules))
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 1cf702d..d582878 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1493,6 +1493,19 @@ def _make_parser_test(LEXER, PARSER):
 
             parser.parse(r'"That" "And a \"b"')
 
+
+        def test_meddling_unused(self):
+            "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"
+
+            grammar = """
+                start: EKS* x
+                x: EKS
+                unused: x*
+                EKS: "x"
+            """
+            parser = _Lark(grammar)
+
+
         @unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)")
         def test_serialize(self):
             grammar = """

From 7b43742afd2c4953f34ad68f7e6aae417c24b22f Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Thu, 23 May 2019 01:05:07 +0300
Subject: [PATCH 009/132] Fixed IMAG_NUMBER in the Python3 grammar (Issue #387)

---
 examples/python3.lark | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/python3.lark b/examples/python3.lark
index bfd2a4c..3f39f9f 100644
--- a/examples/python3.lark
+++ b/examples/python3.lark
@@ -178,7 +178,7 @@ HEX_NUMBER.2: /0x[\da-f]*/i
 OCT_NUMBER.2: /0o[0-7]*/i
 BIN_NUMBER.2 : /0b[0-1]*/i
 FLOAT_NUMBER.2: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i
-IMAG_NUMBER.2: /\d+j|${FLOAT_NUMBER}j/i
+IMAG_NUMBER.2: /\d+j/i | FLOAT_NUMBER "j"i
 
 %ignore /[\t \f]+/  // WS
 %ignore /\\[\t \f]*\r?\n/   // LINE_CONT

From 8e81dc00619c594922cf305f9d56c54cd5c15275 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erez27+git@gmail.com>
Date: Mon, 27 May 2019 14:40:11 +0300
Subject: [PATCH 010/132] Update "Projects using Lark"

---
 README.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index bc48b48..b9a07cf 100644
--- a/README.md
+++ b/README.md
@@ -132,9 +132,17 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail
 
 ### Projects using Lark
 
+ - [storyscript](https://github.com/storyscript/storyscript) - The programming language for Application Storytelling
+ - [tartiflette](https://github.com/dailymotion/tartiflette) - a GraphQL engine by Dailymotion. Lark is used to parse the GraphQL schemas definitions.
  - [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration
+ - [synapse](https://github.com/vertexproject/synapse) - an intelligence analysis platform
+ - [Command-Block-Assembly](https://github.com/simon816/Command-Block-Assembly) - An assembly language, and C compiler, for Minecraft commands
+ - [SPFlow](https://github.com/SPFlow/SPFlow) - Library for Sum-Product Networks 
+ - [https://github.com/aiqm/torchani](Accurate Neural Network Potential on PyTorch)
  - [pytreeview](https://gitlab.com/parmenti/pytreeview) - a lightweight tree-based grammar explorer
- - [tartiflette](https://github.com/dailymotion/tartiflette) - a GraphQL engine by Dailymotion (Lark is used to parse the GraphQL schemas definitions)
+ - [required](https://github.com/shezadkhan137/required) - multi-field validation using docstrings
+ - [miniwdl](https://github.com/chanzuckerberg/miniwdl) - A static analysis toolkit for the Workflow Description Language 
+
 
 Using Lark? Send me a message and I'll add your project!
 

From 335be9d289ebbc6c94adc634e9f7a15ab611942f Mon Sep 17 00:00:00 2001
From: Erez Shinan <erez27+git@gmail.com>
Date: Mon, 27 May 2019 14:44:40 +0300
Subject: [PATCH 011/132] Update README.md

---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b9a07cf..975b9a4 100644
--- a/README.md
+++ b/README.md
@@ -134,14 +134,15 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail
 
  - [storyscript](https://github.com/storyscript/storyscript) - The programming language for Application Storytelling
  - [tartiflette](https://github.com/dailymotion/tartiflette) - a GraphQL engine by Dailymotion. Lark is used to parse the GraphQL schemas definitions.
+ - [Hypothesis](https://github.com/HypothesisWorks/hypothesis) - Library for property-based testing
  - [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration
  - [synapse](https://github.com/vertexproject/synapse) - an intelligence analysis platform
  - [Command-Block-Assembly](https://github.com/simon816/Command-Block-Assembly) - An assembly language, and C compiler, for Minecraft commands
  - [SPFlow](https://github.com/SPFlow/SPFlow) - Library for Sum-Product Networks 
- - [https://github.com/aiqm/torchani](Accurate Neural Network Potential on PyTorch)
- - [pytreeview](https://gitlab.com/parmenti/pytreeview) - a lightweight tree-based grammar explorer
+ - [Torchani](https://github.com/aiqm/torchani) - Accurate Neural Network Potential on PyTorch
  - [required](https://github.com/shezadkhan137/required) - multi-field validation using docstrings
  - [miniwdl](https://github.com/chanzuckerberg/miniwdl) - A static analysis toolkit for the Workflow Description Language 
+ - [pytreeview](https://gitlab.com/parmenti/pytreeview) - a lightweight tree-based grammar explorer
 
 
 Using Lark? Send me a message and I'll add your project!

From 9a64d2124be6affe6664ef222e484b95a69aa808 Mon Sep 17 00:00:00 2001
From: David Kemp <19152940+davaya@users.noreply.github.com>
Date: Wed, 29 May 2019 13:21:33 -0400
Subject: [PATCH 012/132] Clarify handling of filtered terminals

---
 docs/tree_construction.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/docs/tree_construction.md b/docs/tree_construction.md
index 47deab2..f80c743 100644
--- a/docs/tree_construction.md
+++ b/docs/tree_construction.md
@@ -22,6 +22,23 @@ Lark filters out certain types of terminals by default, considering them punctua
 
     - Unnamed regular expressions (like `/[0-9]/`)
     - Named terminals whose name starts with a letter (like `DIGIT`)
+    - All terminals concatenated within a terminal
+
+**Example:**
+```
+start:  PNAME pname
+
+PNAME:  "(" NAME ")"
+pname:  "(" NAME ")"
+
+NAME:   /\w+/
+%ignore /\s+/
+```
+Lark will parse "(Hello) (World)" as:
+
+    start
+        (Hello)
+        pname World
 
 Rules prefixed with `!` will retain all their literals regardless.
 

From 6a14e25f407b81490d5dc4a5a701a23c386e652d Mon Sep 17 00:00:00 2001
From: David Kemp <19152940+davaya@users.noreply.github.com>
Date: Wed, 29 May 2019 15:05:10 -0400
Subject: [PATCH 013/132] Update description of terminals

---
 docs/tree_construction.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/tree_construction.md b/docs/tree_construction.md
index f80c743..6b581e0 100644
--- a/docs/tree_construction.md
+++ b/docs/tree_construction.md
@@ -22,7 +22,8 @@ Lark filters out certain types of terminals by default, considering them punctua
 
     - Unnamed regular expressions (like `/[0-9]/`)
     - Named terminals whose name starts with a letter (like `DIGIT`)
-    - All terminals concatenated within a terminal
+
+Note: Terminals composed of literals and other terminals always include the entire match without filtering any part.
 
 **Example:**
 ```

From f814d91f9dc0d9989a9ef413cca0f2622427eb74 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Tue, 11 Jun 2019 11:09:16 +0300
Subject: [PATCH 014/132] Removed possibly problematic code (Issue #372)

---
 lark/load_grammar.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 281dc5b..8bda118 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -520,7 +520,7 @@ class Grammar:
                 if alias and name.startswith('_'):
                     raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))
 
-                empty_indices = [x==_EMPTY for i, x in enumerate(expansion)]
+                empty_indices = [x==_EMPTY for x in expansion]
                 if any(empty_indices):
                     exp_options = copy(options) if options else RuleOptions()
                     exp_options.empty_indices = empty_indices

From 39a17f1d56fe067d5afd72c114bedd9cbb7eba20 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Fri, 21 Jun 2019 10:04:57 +0300
Subject: [PATCH 015/132] Fixed broken link in docs (Issue #399)

---
 docs/how_to_use.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/how_to_use.md b/docs/how_to_use.md
index c2987df..886b440 100644
--- a/docs/how_to_use.md
+++ b/docs/how_to_use.md
@@ -10,7 +10,7 @@ This is the recommended process for working with Lark:
 
 3. Try your grammar in Lark against each input sample. Make sure the resulting parse-trees make sense.
 
-4. Use Lark's grammar features to [[shape the tree|Tree Construction]]: Get rid of superfluous rules by inlining them, and use aliases when specific cases need clarification.
+4. Use Lark's grammar features to [shape the tree](tree_construction.md): Get rid of superfluous rules by inlining them, and use aliases when specific cases need clarification.
 
   - You can perform steps 1-4 repeatedly, gradually growing your grammar to include more sentences.
 

From f1e844accdb0dc544e3c92d1571d0c9a7e832765 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Mon, 1 Jul 2019 17:07:23 +0300
Subject: [PATCH 016/132] Mid work. Not promising

---
 lark/common.py                   |  1 +
 lark/exceptions.py               |  4 +++-
 lark/lark.py                     |  3 +++
 lark/lexer.py                    |  4 +++-
 lark/load_grammar.py             |  5 +++--
 lark/parsers/cyk.py              |  2 +-
 lark/parsers/earley.py           |  2 +-
 lark/parsers/grammar_analysis.py |  2 +-
 lark/parsers/lalr_analysis.py    | 16 +++++++---------
 lark/parsers/lalr_parser.py      |  6 +++---
 tests/test_parser.py             |  9 +++++++++
 11 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/lark/common.py b/lark/common.py
index e1ec220..7103d14 100644
--- a/lark/common.py
+++ b/lark/common.py
@@ -20,6 +20,7 @@ class LexerConf(Serialize):
 
 class ParserConf:
     def __init__(self, rules, callbacks, start):
+        assert isinstance(start, list)
         self.rules = rules
         self.callbacks = callbacks
         self.start = start
diff --git a/lark/exceptions.py b/lark/exceptions.py
index f781968..4207589 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -52,7 +52,7 @@ class UnexpectedInput(LarkError):
 
 
 class UnexpectedCharacters(LexError, UnexpectedInput):
-    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None):
+    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
         message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
 
         self.line = line
@@ -65,6 +65,8 @@ class UnexpectedCharacters(LexError, UnexpectedInput):
         message += '\n\n' + self.get_context(seq)
         if allowed:
             message += '\nExpecting: %s\n' % allowed
+        if token_history:
+            message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in token_history)
 
         super(UnexpectedCharacters, self).__init__(message)
 
diff --git a/lark/lark.py b/lark/lark.py
index 87f7137..e096c55 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -85,6 +85,9 @@ class LarkOptions(Serialize):
 
             options[name] = value
 
+        if isinstance(options['start'], str):
+            options['start'] = [options['start']]
+
         self.__dict__['options'] = options
 
         assert self.parser in ('earley', 'lalr', 'cyk', None)
diff --git a/lark/lexer.py b/lark/lexer.py
index bdf635d..3e881f8 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -149,6 +149,7 @@ class _Lex:
         newline_types = frozenset(newline_types)
         ignore_types = frozenset(ignore_types)
         line_ctr = LineCounter()
+        last_token = None
 
         while line_ctr.char_pos < len(stream):
             lexer = self.lexer
@@ -166,6 +167,7 @@ class _Lex:
                         t = lexer.callback[t.type](t)
                         if not isinstance(t, Token):
                             raise ValueError("Callbacks must return a token (returned %r)" % t)
+                    last_token = t
                     yield t
                 else:
                     if type_ in lexer.callback:
@@ -180,7 +182,7 @@ class _Lex:
                 break
             else:
                 allowed = {v for m, tfi in lexer.mres for v in tfi.values()}
-                raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state)
+                raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token])
 
 
 class UnlessCallback:
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 8bda118..f7b1011 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -554,7 +554,8 @@ class Grammar:
                                 for s in r.expansion
                                 if isinstance(s, NonTerminal)
                                 and s != r.origin}
-            compiled_rules = [r for r in compiled_rules if r.origin.name==start or r.origin in used_rules]
+            used_rules |= {NonTerminal(s) for s in start}
+            compiled_rules = [r for r in compiled_rules if r.origin in used_rules]
             if len(compiled_rules) == c:
                 break
 
@@ -690,7 +691,7 @@ class GrammarLoader:
         callback = ParseTreeBuilder(rules, ST).create_callback()
         lexer_conf = LexerConf(terminals, ['WS', 'COMMENT'])
 
-        parser_conf = ParserConf(rules, callback, 'start')
+        parser_conf = ParserConf(rules, callback, ['start'])
         self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf)
 
         self.canonize_tree = CanonizeTree()
diff --git a/lark/parsers/cyk.py b/lark/parsers/cyk.py
index 2121449..52584a7 100644
--- a/lark/parsers/cyk.py
+++ b/lark/parsers/cyk.py
@@ -89,7 +89,7 @@ class Parser(object):
         self.orig_rules = {rule: rule for rule in rules}
         rules = [self._to_rule(rule) for rule in rules]
         self.grammar = to_cnf(Grammar(rules))
-        self.start = NT(start)
+        self.start = NT(start[0])
 
     def _to_rule(self, lark_rule):
         """Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index 0518174..3cd0193 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -274,7 +274,7 @@ class Parser:
         assert i == len(columns)-1
 
     def parse(self, stream, start_symbol=None):
-        start_symbol = NonTerminal(start_symbol or self.parser_conf.start)
+        start_symbol = NonTerminal(start_symbol or self.parser_conf.start[0])
 
         columns = [set()]
         to_scan = set()     # The scan buffer. 'Q' in E.Scott's paper.
diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py
index 732496c..bdfd92f 100644
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -109,7 +109,7 @@ class GrammarAnalyzer(object):
     def __init__(self, parser_conf, debug=False):
         self.debug = debug
 
-        rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])]
+        rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(s), Terminal('$END')]) for s in parser_conf.start]
         self.rules_by_origin = classify(rules, lambda r: r.origin)
 
         if len(rules) != len(set(rules)):
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index 54a4041..76e44c7 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -29,10 +29,10 @@ Shift = Action('Shift')
 Reduce = Action('Reduce')
 
 class ParseTable:
-    def __init__(self, states, start_state, end_state):
+    def __init__(self, states, start_state, end_states):
         self.states = states
         self.start_state = start_state
-        self.end_state = end_state
+        self.end_states = end_states
 
     def serialize(self, memo):
         tokens = Enumerator()
@@ -48,7 +48,7 @@ class ParseTable:
             'tokens': tokens.reversed(),
             'states': states,
             'start_state': self.start_state,
-            'end_state': self.end_state,
+            'end_states': self.end_states,
         }
 
     @classmethod
@@ -59,7 +59,7 @@ class ParseTable:
                     for token, (action, arg) in actions.items()}
             for state, actions in data['states'].items()
         }
-        return cls(states, data['start_state'], data['end_state'])
+        return cls(states, data['start_state'], data['end_states'])
 
 
 class IntParseTable(ParseTable):
@@ -77,8 +77,8 @@ class IntParseTable(ParseTable):
 
 
         start_state = state_to_idx[parse_table.start_state]
-        end_state = state_to_idx[parse_table.end_state]
-        return cls(int_states, start_state, end_state)
+        end_states = [state_to_idx[s] for s in parse_table.end_states]
+        return cls(int_states, start_state, end_states)
 
 ###}
 
@@ -130,9 +130,7 @@ class LALR_Analyzer(GrammarAnalyzer):
         for _ in bfs([self.start_state], step):
             pass
 
-        self.end_state ,= self.end_states
-
-        self._parse_table = ParseTable(self.states, self.start_state, self.end_state)
+        self._parse_table = ParseTable(self.states, self.start_state, self.end_states)
 
         if self.debug:
             self.parse_table = self._parse_table
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index aea75ca..1d56f5e 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -40,7 +40,7 @@ class _Parser:
     def __init__(self, parse_table, callbacks):
         self.states = parse_table.states
         self.start_state = parse_table.start_state
-        self.end_state = parse_table.end_state
+        self.end_states = parse_table.end_states
         self.callbacks = callbacks
 
     def parse(self, seq, set_state=None):
@@ -81,7 +81,7 @@ class _Parser:
         for token in stream:
             while True:
                 action, arg = get_action(token)
-                assert arg != self.end_state
+                assert arg not in self.end_states
 
                 if action is Shift:
                     state_stack.append(arg)
@@ -95,7 +95,7 @@ class _Parser:
         while True:
             _action, arg = get_action(token)
             if _action is Shift:
-                assert arg == self.end_state
+                assert arg in self.end_states
                 val ,= value_stack
                 return val
             else:
diff --git a/tests/test_parser.py b/tests/test_parser.py
index d582878..bc8388c 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1523,6 +1523,15 @@ def _make_parser_test(LEXER, PARSER):
             parser3 = Lark.deserialize(d, namespace, m)
             self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) )
 
+        def test_multi_start(self):
+            parser = _Lark('''
+                a: "x" 
+                b: "x" "b"?
+            ''', start=['a', 'b'])
+
+            # parser.parse('acab')
+            # parser.parse('bcab')
+
 
 
     _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()

From 71fe87964a547ed56cb7af8befdcb9d863d6bb74 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Mon, 1 Jul 2019 17:39:10 +0300
Subject: [PATCH 017/132] Small refactor in grammar analysis

---
 lark/parsers/grammar_analysis.py | 8 ++++++--
 lark/parsers/lalr_analysis.py    | 5 -----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py
index 732496c..ab84efb 100644
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -109,7 +109,8 @@ class GrammarAnalyzer(object):
     def __init__(self, parser_conf, debug=False):
         self.debug = debug
 
-        rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])]
+        root_rule = Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])
+        rules = parser_conf.rules + [root_rule]
         self.rules_by_origin = classify(rules, lambda r: r.origin)
 
         if len(rules) != len(set(rules)):
@@ -121,7 +122,10 @@ class GrammarAnalyzer(object):
                 if not (sym.is_term or sym in self.rules_by_origin):
                     raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation
 
-        self.start_state = self.expand_rule(NonTerminal('$root'))
+        self.start_state = self.expand_rule(root_rule.origin)
+
+        end_rule = RulePtr(root_rule, len(root_rule.expansion))
+        self.end_state = fzset({end_rule})
 
         self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
 
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index 54a4041..ee2f75c 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -85,7 +85,6 @@ class IntParseTable(ParseTable):
 class LALR_Analyzer(GrammarAnalyzer):
 
     def compute_lookahead(self):
-        self.end_states = []
 
         self.states = {}
         def step(state):
@@ -105,8 +104,6 @@ class LALR_Analyzer(GrammarAnalyzer):
 
                 new_state = fzset(rps)
                 lookahead[sym].append((Shift, new_state))
-                if sym == Terminal('$END'):
-                    self.end_states.append( new_state )
                 yield new_state
 
             for k, v in lookahead.items():
@@ -130,8 +127,6 @@ class LALR_Analyzer(GrammarAnalyzer):
         for _ in bfs([self.start_state], step):
             pass
 
-        self.end_state ,= self.end_states
-
         self._parse_table = ParseTable(self.states, self.start_state, self.end_state)
 
         if self.debug:

From bcc4e67bceea22635b286c852f846ad4d324f01a Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Mon, 1 Jul 2019 18:30:25 +0300
Subject: [PATCH 018/132] CYK also working

---
 lark/parser_frontends.py |  4 ++--
 lark/parsers/cyk.py      | 12 +++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index 1b55fe1..c1bb3c9 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -194,13 +194,13 @@ class CYK(WithLexer):
         self.init_traditional_lexer()
 
         self._analysis = GrammarAnalyzer(parser_conf)
-        self._parser = cyk.Parser(parser_conf.rules, parser_conf.start)
+        self.parser = cyk.Parser(parser_conf.rules)
 
         self.callbacks = parser_conf.callbacks
 
     def parse(self, text, start):
         tokens = list(self.lex(text))
-        parse = self._parser.parse(tokens)
+        parse = self._parse(tokens, start)
         parse = self._transform(parse)
         return parse
 
diff --git a/lark/parsers/cyk.py b/lark/parsers/cyk.py
index 52584a7..7b25609 100644
--- a/lark/parsers/cyk.py
+++ b/lark/parsers/cyk.py
@@ -84,12 +84,11 @@ class RuleNode(object):
 class Parser(object):
     """Parser wrapper."""
 
-    def __init__(self, rules, start):
+    def __init__(self, rules):
         super(Parser, self).__init__()
         self.orig_rules = {rule: rule for rule in rules}
         rules = [self._to_rule(rule) for rule in rules]
         self.grammar = to_cnf(Grammar(rules))
-        self.start = NT(start[0])
 
     def _to_rule(self, lark_rule):
         """Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
@@ -100,13 +99,16 @@ class Parser(object):
             weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0,
             alias=lark_rule)
 
-    def parse(self, tokenized):  # pylint: disable=invalid-name
+    def parse(self, tokenized, start):  # pylint: disable=invalid-name
         """Parses input, which is a list of tokens."""
+        assert start
+        start = NT(start)
+
         table, trees = _parse(tokenized, self.grammar)
         # Check if the parse succeeded.
-        if all(r.lhs != self.start for r in table[(0, len(tokenized) - 1)]):
+        if all(r.lhs != start for r in table[(0, len(tokenized) - 1)]):
             raise ParseError('Parsing failed.')
-        parse = trees[(0, len(tokenized) - 1)][self.start]
+        parse = trees[(0, len(tokenized) - 1)][start]
         return self._to_tree(revert_cnf(parse))
 
     def _to_tree(self, rule_node):

From be2e860c83eb6c0ee30f6f8cb8063373e2989067 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 24 Dec 2018 16:11:46 +0200
Subject: [PATCH 019/132] Added to tests: Make sure the standalone parser is
 reusable

---
 tests/test_tools.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_tools.py b/tests/test_tools.py
index ff823ec..27927eb 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -49,6 +49,8 @@ class TestStandalone(TestCase):
         l = _Lark()
         x = l.parse('12 elephants')
         self.assertEqual(x.children, ['12', 'elephants'])
+        x = l.parse('16 candles')
+        self.assertEqual(x.children, ['16', 'candles'])
 
     def test_contextual(self):
         grammar = """

From 505c46e9ba30f125f457654422f6521121a4c5f1 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Tue, 2 Jul 2019 18:31:22 +0300
Subject: [PATCH 020/132] Cleaned up a test

---
 tests/test_tools.py | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/tests/test_tools.py b/tests/test_tools.py
index 27927eb..5316396 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -1,11 +1,9 @@
 from __future__ import absolute_import
 
 import sys
-import unittest
-from unittest import TestCase
+from unittest import TestCase, main
 
 from lark.tree import Tree
-
 from lark.tools import standalone
 
 try:
@@ -94,26 +92,19 @@ class TestStandalone(TestCase):
             _NEWLINE: /\n/
         """
 
-        # from lark import Lark
-        # l = Lark(grammar, parser='lalr', lexer='contextual', postlex=MyIndenter())
-        # x = l.parse('(\n)\n')
-        # print('@@', x)
-
-
         context = self._create_standalone(grammar)
         _Lark = context['Lark_StandAlone']
 
-        # l = _Lark(postlex=MyIndenter())
-        # x = l.parse('()\n')
-        # print(x)
+        l = _Lark(postlex=MyIndenter())
+        x = l.parse('()\n')
+        self.assertEqual(x, Tree('start', []))
         l = _Lark(postlex=MyIndenter())
         x = l.parse('(\n)\n')
-        print(x)
-
+        self.assertEqual(x, Tree('start', []))
 
 
 
 if __name__ == '__main__':
-    unittest.main()
+    main()
 
 

From 94e7e82a199d846650090d7f46562ed9c1f10692 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Tue, 9 Jul 2019 16:57:30 +0300
Subject: [PATCH 021/132] Updated docstrings for multiple start symbols

---
 lark/lark.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lark/lark.py b/lark/lark.py
index 82cf76a..5c43fa8 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -43,7 +43,7 @@ class LarkOptions(Serialize):
         keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
         cache_grammar - Cache the Lark grammar (Default: False)
         postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
-        start - The start symbol (Default: start)
+        start - The start symbol, either a string, or a list of strings for multiple possible starts (Default: "start")
         profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
         priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto)
         propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
@@ -291,7 +291,12 @@ class Lark(Serialize):
         return stream
 
     def parse(self, text, start=None):
-        "Parse the given text, according to the options provided. Returns a tree, unless specified otherwise."
+        """Parse the given text, according to the options provided.
+
+        The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option).
+
+        Returns a tree, unless specified otherwise.
+        """
         return self.parser.parse(text, start=start)
 
 ###}

From e3cbd7aadc26faa69b1cda1253871dca3f1c665f Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 21 Jul 2019 00:50:28 +0200
Subject: [PATCH 022/132] Negative priority now allowed in rules and tokens.
 Updated docs about priority

---
 docs/grammar.md      | 13 ++++++++++++-
 docs/parsers.md      |  2 +-
 lark/load_grammar.py |  4 ++--
 tests/test_parser.py | 26 ++++++++++++++++++++++++++
 4 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/docs/grammar.md b/docs/grammar.md
index ad70f6e..9343ee4 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -45,6 +45,12 @@ Literals can be one of:
 * `/re with flags/imulx`
 * Literal range: `"a".."z"`, `"1".."9"`, etc.
 
+### Priority
+
+Terminals can be assigned priority only when using a lexer (future versions may support Earley's dynamic lexing).
+
+Priority can be either positive or negative. In not specified for a terminal, it's assumed to be 1 (i.e. the default).
+
 #### Notes for when using a lexer:
 
 When using a lexer (standard or contextual), it is the grammar-author's responsibility to make sure the literals don't collide, or that if they do, they are matched in the desired order. Literals are matched in an order according to the following criteria:
@@ -90,7 +96,7 @@ Each item is one of:
 * `item*` - Zero or more instances of item
 * `item+` - One or more instances of item
 * `item ~ n` - Exactly *n* instances of item
-* `item ~ n..m` - Between *n* to *m* instances of item
+* `item ~ n..m` - Between *n* to *m* instances of item (not recommended for wide ranges, due to performance issues)
 
 **Examples:**
 ```perl
@@ -102,6 +108,11 @@ expr: expr operator expr
 four_words: word ~ 4
 ```
 
+### Priority
+
+Rules can be assigned priority only when using Earley (future versions may support LALR as well).
+
+Priority can be either positive or negative. In not specified for a terminal, it's assumed to be 1 (i.e. the default).
 
 ## Directives
 
diff --git a/docs/parsers.md b/docs/parsers.md
index 35de223..fb7c997 100644
--- a/docs/parsers.md
+++ b/docs/parsers.md
@@ -7,7 +7,7 @@ An [Earley Parser](https://www.wikiwand.com/en/Earley_parser) is a chart parser
 
 Lark's Earley implementation runs on top of a skipping chart parser, which allows it to use regular expressions, instead of matching characters one-by-one. This is a huge improvement to Earley that is unique to Lark. This feature is used by default, but can also be requested explicitely using `lexer='dynamic'`.
 
-It's possible to bypass the dynamic lexer, and use the regular Earley parser with a traditional lexer, that tokenizes as an independant first step. Doing so will provide a speed benefit, but will tokenize without using Earley's ambiguity-resolution ability. So choose this only if you know why! Activate with `lexer='standard'`
+It's possible to bypass the dynamic lexing, and use the regular Earley parser with a traditional lexer, that tokenizes as an independant first step. Doing so will provide a speed benefit, but will tokenize without using Earley's ambiguity-resolution ability. So choose this only if you know why! Activate with `lexer='standard'`
 
 **SPPF & Ambiguity resolution**
 
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index f7b1011..f6c1d22 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -90,7 +90,7 @@ TERMINALS = {
     '_IGNORE': r'%ignore',
     '_DECLARE': r'%declare',
     '_IMPORT': r'%import',
-    'NUMBER': r'\d+',
+    'NUMBER': r'[+-]?\d+',
 }
 
 RULES = {
@@ -196,7 +196,7 @@ class EBNF_to_BNF(Transformer_InPlace):
                 mn = mx = int(args[0])
             else:
                 mn, mx = map(int, args)
-                if mx < mn:
+                if mx < mn or mn < 0:
                     raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
             return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)])
         assert False, op
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 3238ead..599406f 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1029,6 +1029,32 @@ def _make_parser_test(LEXER, PARSER):
             self.assertEqual(res.children, ['ab'])
 
 
+            grammar = """
+            start: A B | AB
+            A: "a"
+            B.-20: "b"
+            AB.-10: "ab"
+            """
+            l = _Lark(grammar)
+            res = l.parse("ab")
+            self.assertEqual(res.children, ['a', 'b'])
+
+
+            grammar = """
+            start: A B | AB
+            A.-99999999999999999999999: "a"
+            B: "b"
+            AB: "ab"
+            """
+            l = _Lark(grammar)
+            res = l.parse("ab")
+
+            self.assertEqual(res.children, ['ab'])
+
+
+
+
+
 
         def test_import(self):
             grammar = """

From c87cbc63225fb6a0426d7b5b8f2bfbecd978eda1 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 21 Jul 2019 00:59:50 +0200
Subject: [PATCH 023/132] Removed some dead code

---
 lark/parsers/grammar_analysis.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py
index 086349c..a31f308 100644
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -132,7 +132,7 @@ class GrammarAnalyzer(object):
 
         self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
 
-    def expand_rule(self, rule):
+    def expand_rule(self, source_rule):
         "Returns all init_ptrs accessible by rule (recursive)"
         init_ptrs = set()
         def _expand_rule(rule):
@@ -147,14 +147,7 @@ class GrammarAnalyzer(object):
                     if not new_r.is_term:
                         yield new_r
 
-        for _ in bfs([rule], _expand_rule):
+        for _ in bfs([source_rule], _expand_rule):
             pass
 
-        return fzset(init_ptrs)
-
-    def _first(self, r):
-        if r.is_term:
-            return {r}
-        else:
-            return {rp.next for rp in self.expand_rule(r) if rp.next.is_term}
-
+        return fzset(init_ptrs)
\ No newline at end of file

From 39b0d769141d3f8e579b5a4711bd22b579657801 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 21 Jul 2019 09:46:08 +0200
Subject: [PATCH 024/132] Minor optimization in LALR (thanks to @Raekye)

---
 lark/parsers/grammar_analysis.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py
index a31f308..8fc0806 100644
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -82,9 +82,10 @@ def calculate_sets(rules):
                     changed = True
 
             for i, sym in enumerate(rule.expansion):
-                if set(rule.expansion[:i]) <= NULLABLE:
-                    if update_set(FIRST[rule.origin], FIRST[sym]):
-                        changed = True
+                if set(rule.expansion[:i]) > NULLABLE:
+                    break
+                if update_set(FIRST[rule.origin], FIRST[sym]):
+                    changed = True
 
     # Calculate FOLLOW
     changed = True

From 8e9da6a6d6e2d6395211003ed880b712a2304779 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 21 Jul 2019 09:51:27 +0200
Subject: [PATCH 025/132] Minor optimization in LALR (and fix for last commit)

---
 lark/parsers/grammar_analysis.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py
index 8fc0806..306059d 100644
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -38,7 +38,7 @@ class RulePtr(object):
 
 
 def update_set(set1, set2):
-    if not set2:
+    if not set2 or set1 > set2:
         return False
 
     copy = set(set1)
@@ -82,10 +82,11 @@ def calculate_sets(rules):
                     changed = True
 
             for i, sym in enumerate(rule.expansion):
-                if set(rule.expansion[:i]) > NULLABLE:
+                if set(rule.expansion[:i]) <= NULLABLE:
+                    if update_set(FIRST[rule.origin], FIRST[sym]):
+                        changed = True
+                else:
                     break
-                if update_set(FIRST[rule.origin], FIRST[sym]):
-                    changed = True
 
     # Calculate FOLLOW
     changed = True

From d952f2a0694787ab8e67256574710cb56a2f4c26 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 24 Jul 2019 12:27:34 +0200
Subject: [PATCH 026/132] Token values are now always unicode (resolves issue
 #411)

---
 lark/lexer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 3e881f8..898ee04 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -88,7 +88,7 @@ class Token(Str):
 
         self.type = type_
         self.pos_in_stream = pos_in_stream
-        self.value = value
+        self.value = Str(value)
         self.line = line
         self.column = column
         self.end_line = end_line

From 0d164bd344cf164954b52c6fc50f4ddcd23a2cfd Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 24 Jul 2019 23:00:55 +0200
Subject: [PATCH 027/132] Added get_terminal() method (Issue #412)

---
 lark/lark.py  | 6 ++++++
 lark/lexer.py | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/lark/lark.py b/lark/lark.py
index 5c43fa8..ae71d56 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -205,6 +205,8 @@ class Lark(Serialize):
         # Compile the EBNF grammar into BNF
         self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
 
+        self._terminals_dict = {t.name:t for t in self.terminals}
+
         # If the user asked to invert the priorities, negate them all here.
         # This replaces the old 'resolve__antiscore_sum' option.
         if self.options.priority == 'invert':
@@ -290,6 +292,10 @@ class Lark(Serialize):
             return self.options.postlex.process(stream)
         return stream
 
+    def get_terminal(self, name):
+        "Get information about a terminal"
+        return self._terminals_dict[name]
+
     def parse(self, text, start=None):
         """Parse the given text, according to the options provided.
 
diff --git a/lark/lexer.py b/lark/lexer.py
index 898ee04..4a8b422 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -41,6 +41,8 @@ class Pattern(Serialize):
 
 
 class PatternStr(Pattern):
+    type = "str"
+    
     def to_regexp(self):
         return self._get_flags(re.escape(self.value))
 
@@ -50,6 +52,8 @@ class PatternStr(Pattern):
     max_width = min_width
 
 class PatternRE(Pattern):
+    type = "re"
+
     def to_regexp(self):
         return self._get_flags(self.value)
 

From 7add0e1f3f561f8701a745f0e1d05d71da82752d Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Thu, 25 Jul 2019 09:56:20 +0200
Subject: [PATCH 028/132] Memoize get_regexp_width (Issue #413)

---
 lark/lexer.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 4a8b422..d3e4af6 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -57,12 +57,18 @@ class PatternRE(Pattern):
     def to_regexp(self):
         return self._get_flags(self.value)
 
+    _width = None
+    def _get_width(self):
+        if self._width is None:
+            self._width = get_regexp_width(self.to_regexp())
+        return self._width
+
     @property
     def min_width(self):
-        return get_regexp_width(self.to_regexp())[0]
+        return self._get_width()[0]
     @property
     def max_width(self):
-        return get_regexp_width(self.to_regexp())[1]
+        return self._get_width()[1]
 
 
 class TerminalDef(Serialize):

From dd3a812fead3fc1f2a45d45d337fb7abaabf63b1 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Tue, 30 Jul 2019 11:31:28 +0200
Subject: [PATCH 029/132] Version bump (0.7.2)

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index 7fd92ee..db2ce44 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.7.1"
+__version__ = "0.7.2"

From 21c41e54a9728587bc2043d187b9230acad9fcec Mon Sep 17 00:00:00 2001
From: Raekye <Raekye@users.noreply.github.com>
Date: Tue, 30 Jul 2019 19:49:23 -0400
Subject: [PATCH 030/132] lalr parser

---
 lark/parsers/grammar_analysis.py |  36 ++++++-
 lark/parsers/lalr_analysis.py    | 171 +++++++++++++++++++++++++------
 lark/parsers/lalr_parser.py      |  21 ++--
 3 files changed, 184 insertions(+), 44 deletions(-)

diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py
index 086349c..5a4d0e8 100644
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -36,6 +36,23 @@ class RulePtr(object):
     def __hash__(self):
         return hash((self.rule, self.index))
 
+class LR0ItemSet(object):
+    __slots__ = ('kernel', 'closure', 'transitions')
+
+    def __init__(self, kernel, closure):
+        self.kernel = fzset(kernel)
+        self.closure = fzset(closure)
+        self.transitions = {}
+
+    def __eq__(self, other):
+        return self.kernel == other.kernel
+
+    def __hash__(self):
+        return hash(self.kernel)
+
+    def __repr__(self):
+        return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
+
 
 def update_set(set1, set2):
     if not set2:
@@ -130,15 +147,29 @@ class GrammarAnalyzer(object):
         self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
                            for start, root_rule in root_rules.items()}
 
+        lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
+                for start in parser_conf.start}
+
+        lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
+
+        self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
+
+        self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
+                for start, root_rule in lr0_root_rules.items()}
+
         self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
 
-    def expand_rule(self, rule):
+    def expand_rule(self, rule, rules_by_origin=None):
         "Returns all init_ptrs accessible by rule (recursive)"
+
+        if rules_by_origin is None:
+            rules_by_origin = self.rules_by_origin
+
         init_ptrs = set()
         def _expand_rule(rule):
             assert not rule.is_term, rule
 
-            for r in self.rules_by_origin[rule]:
+            for r in rules_by_origin[rule]:
                 init_ptr = RulePtr(r, 0)
                 init_ptrs.add(init_ptr)
 
@@ -157,4 +188,3 @@ class GrammarAnalyzer(object):
             return {r}
         else:
             return {rp.next for rp in self.expand_rule(r) if rp.next.is_term}
-
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index eef1f9b..61fe692 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -12,7 +12,7 @@ from collections import defaultdict
 from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
 from ..exceptions import GrammarError
 
-from .grammar_analysis import GrammarAnalyzer, Terminal
+from .grammar_analysis import GrammarAnalyzer, Terminal, RulePtr, LR0ItemSet
 from ..grammar import Rule
 
 ###{standalone
@@ -84,53 +84,158 @@ class IntParseTable(ParseTable):
 
 class LALR_Analyzer(GrammarAnalyzer):
 
-    def compute_lookahead(self):
+    def generate_lr0_states(self):
+        self.states = set()
 
-        self.states = {}
         def step(state):
-            lookahead = defaultdict(list)
-            sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied)
-            for rp in sat:
-                for term in self.FOLLOW.get(rp.rule.origin, ()):
-                    lookahead[term].append((Reduce, rp.rule))
+            _, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
 
             d = classify(unsat, lambda rp: rp.next)
             for sym, rps in d.items():
-                rps = {rp.advance(sym) for rp in rps}
+                kernel = {rp.advance(sym) for rp in rps}
+                closure = set(kernel)
 
-                for rp in set(rps):
+                for rp in kernel:
                     if not rp.is_satisfied and not rp.next.is_term:
-                        rps |= self.expand_rule(rp.next)
+                        closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
 
-                new_state = fzset(rps)
-                lookahead[sym].append((Shift, new_state))
+                new_state = LR0ItemSet(kernel, closure)
+                state.transitions[sym] = new_state
                 yield new_state
 
-            for k, v in lookahead.items():
-                if len(v) > 1:
-                    if self.debug:
-                        logging.warning("Shift/reduce conflict for terminal %s:  (resolving as shift)", k.name)
-                        for act, arg in v:
-                            logging.warning(' * %s: %s', act, arg)
-                    for x in v:
-                        # XXX resolving shift/reduce into shift, like PLY
-                        # Give a proper warning
-                        if x[0] is Shift:
-                            lookahead[k] = [x]
-
-            for k, v in lookahead.items():
-                if not len(v) == 1:
-                    raise GrammarError("Collision in %s: %s" %(k, ', '.join(['\n  * %s: %s' % x for x in v])))
-
-            self.states[state] = {k.name:v[0] for k, v in lookahead.items()}
-
-        for _ in bfs(self.start_states.values(), step):
+            self.states.add(state)
+
+        for _ in bfs(self.lr0_start_states.values(), step):
             pass
 
-        self._parse_table = ParseTable(self.states, self.start_states, self.end_states)
+    def discover_lookaheads(self):
+        # state -> rule -> set of lookaheads
+        self.lookaheads = defaultdict(lambda: defaultdict(set))
+        # state -> rule -> list of (set of lookaheads) to propagate to
+        self.propagates = defaultdict(lambda: defaultdict(list))
+
+        for s in self.lr0_start_states.values():
+            for rp in s.kernel:
+                self.lookaheads[s][rp].add(Terminal('$END'))
+
+        # There is a 1 to 1 correspondance between LR0 and LALR1 states.
+        # We calculate the lookaheads for LALR1 kernel items from the LR0 kernel items.
+        # use a terminal that does not exist in the grammar
+        t = Terminal('$#')
+        for s in self.states:
+            for rp in s.kernel:
+                for rp2, la in self.generate_lr1_closure([(rp, t)]):
+                    if rp2.is_satisfied:
+                        continue
+                    next_symbol = rp2.next
+                    next_state = s.transitions[next_symbol]
+                    rp3 = rp2.advance(next_symbol)
+                    assert(rp3 in next_state.kernel)
+                    x = self.lookaheads[next_state][rp3]
+                    if la == t:
+                        # we must propagate rp's lookaheads to rp3's lookahead set
+                        self.propagates[s][rp].append(x)
+                    else:
+                        # this lookahead is "generated spontaneously" for rp3
+                        x.add(la)
+
+    def propagate_lookaheads(self):
+        changed = True
+        while changed:
+            changed = False
+            for s in self.states:
+                for rp in s.kernel:
+                    # from (from is a keyword)
+                    f = self.lookaheads[s][rp]
+                    # to
+                    t = self.propagates[s][rp]
+                    for x in t:
+                        old = len(x)
+                        x |= f
+                        changed = changed or (len(x) != old)
+
+    def generate_lalr1_states(self):
+        # 1 to 1 correspondance between LR0 and LALR1 states
+        # We must fetch the lookaheads we calculated,
+        # to create the LALR1 kernels from the LR0 kernels.
+        # Then, we generate the LALR1 states by taking the LR1 closure of the new kernel items.
+        # map of LR0 states to LALR1 states
+        m = {}
+        for s in self.states:
+            kernel = []
+            for rp in s.kernel:
+                las = self.lookaheads[s][rp]
+                assert(len(las) > 0)
+                for la in las:
+                    kernel.append((rp, la))
+            m[s] = self.generate_lr1_closure(kernel)
+
+        self.states = {}
+        for s, v in m.items():
+            actions = {}
+            for la, next_state in s.transitions.items():
+                actions[la] = (Shift, next_state.closure)
+
+            sat, _ = classify_bool(v, lambda x: x[0].is_satisfied)
+            reductions = classify(sat, lambda x: x[1], lambda x: x[0])
+            for la, rps in reductions.items():
+                if len(rps) > 1:
+                    raise GrammarError("Collision in %s: %s" % (la, ', '.join([ str(r.rule) for r in rps ])))
+                if la in actions:
+                    if self.debug:
+                        logging.warning("Shift/reduce conflict for terminal %s:  (resolving as shift)", la.name)
+                        logging.warning(' * %s', str(rps[0]))
+                else:
+                    actions[la] = (Reduce, rps[0].rule)
+
+            self.states[s.closure] = {k.name: v for k, v in actions.items()}
+
+        end_states = {}
+        for s in self.states:
+            for rp in s:
+                for start in self.lr0_start_states:
+                    if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
+                        assert(not start in end_states)
+                        end_states[start] = s
+
+        self._parse_table = ParseTable(self.states, {start: state.closure for start, state in self.lr0_start_states.items()}, end_states)
 
         if self.debug:
             self.parse_table = self._parse_table
         else:
             self.parse_table = IntParseTable.from_ParseTable(self._parse_table)
 
+    def generate_lr1_closure(self, kernel):
+        closure = set()
+
+        q = list(kernel)
+        while len(q) > 0:
+            rp, la = q.pop()
+            if (rp, la) in closure:
+                continue
+            closure.add((rp, la))
+
+            if rp.is_satisfied:
+                continue
+            if rp.next.is_term:
+                continue
+
+            l = []
+            i = rp.index + 1
+            n = len(rp.rule.expansion)
+            while i < n:
+                s = rp.rule.expansion[i]
+                l.extend(self.FIRST.get(s, []))
+                if not s in self.NULLABLE:
+                    break
+                i += 1
+
+            # if all of rp.rule.expansion[rp.index + 1:] were nullable:
+            if i == n:
+                l.append(la)
+
+            for r in self.lr0_rules_by_origin[rp.next]:
+                for s in l:
+                    q.append((RulePtr(r, 0), s))
+
+        return closure
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index 39dd5f3..6eb3839 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -6,7 +6,7 @@ from ..exceptions import UnexpectedToken
 from ..lexer import Token
 from ..utils import Enumerator, Serialize
 
-from .lalr_analysis import LALR_Analyzer, Shift, IntParseTable
+from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
 
 
 ###{standalone
@@ -15,7 +15,10 @@ class LALR_Parser(object):
         assert all(r.options is None or r.options.priority is None
                    for r in parser_conf.rules), "LALR doesn't yet support prioritization"
         analysis = LALR_Analyzer(parser_conf, debug=debug)
-        analysis.compute_lookahead()
+        analysis.generate_lr0_states()
+        analysis.discover_lookaheads()
+        analysis.propagate_lookaheads()
+        analysis.generate_lalr1_states()
         callbacks = parser_conf.callbacks
 
         self._parse_table = analysis.parse_table
@@ -65,6 +68,9 @@ class _Parser:
                 raise UnexpectedToken(token, expected, state=state)
 
         def reduce(rule):
+            if state_stack[-1] == end_state:
+                return True
+
             size = len(rule.expansion)
             if size:
                 s = value_stack[-size:]
@@ -80,6 +86,8 @@ class _Parser:
             state_stack.append(new_state)
             value_stack.append(value)
 
+            return False
+
         # Main LALR-parser loop
         for token in stream:
             while True:
@@ -97,11 +105,8 @@ class _Parser:
         token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
         while True:
             _action, arg = get_action(token)
-            if _action is Shift:
-                assert arg == end_state
-                val ,= value_stack
-                return val
-            else:
-                reduce(arg)
+            assert(_action is Reduce)
+            if reduce(arg):
+                return value_stack[-1]
 
 ###}

From 6f412c25b705852958243f3538b329c668ccc6d4 Mon Sep 17 00:00:00 2001
From: Raekye <Raekye@users.noreply.github.com>
Date: Fri, 2 Aug 2019 09:12:22 -0400
Subject: [PATCH 031/132] LALR optimizations and profiling

---
 lark/grammar.py                  |  11 +-
 lark/parsers/grammar_analysis.py | 121 ++++++++++++++-
 lark/parsers/lalr_analysis.py    | 250 ++++++++++++++++++++++++++-----
 lark/parsers/lalr_parser.py      |  12 ++
 4 files changed, 348 insertions(+), 46 deletions(-)

diff --git a/lark/grammar.py b/lark/grammar.py
index 14893fb..f90cce4 100644
--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -3,10 +3,13 @@ from .utils import Serialize
 ###{standalone
 
 class Symbol(Serialize):
+    __slots__ = ('name', '_hash')
+
     is_term = NotImplemented
 
     def __init__(self, name):
         self.name = name
+        self._hash = hash(self.name)
 
     def __eq__(self, other):
         assert isinstance(other, Symbol), other
@@ -16,7 +19,7 @@ class Symbol(Serialize):
         return not (self == other)
 
     def __hash__(self):
-        return hash(self.name)
+        return self._hash
 
     def __repr__(self):
         return '%s(%r)' % (type(self).__name__, self.name)
@@ -31,6 +34,7 @@ class Terminal(Symbol):
 
     def __init__(self, name, filter_out=False):
         self.name = name
+        self._hash = hash(self.name)
         self.filter_out = filter_out
 
     @property
@@ -69,7 +73,7 @@ class Rule(Serialize):
         expansion : a list of symbols
         order : index of this expansion amongst all rules of the same name
     """
-    __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
+    __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp')
 
     __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
     __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
@@ -81,6 +85,7 @@ class Rule(Serialize):
         self.order = order
         self.options = options
         self._hash = hash((self.origin, tuple(self.expansion)))
+        self._rp = None
 
     def _deserialize(self):
         self._hash = hash((self.origin, tuple(self.expansion)))
@@ -101,4 +106,4 @@ class Rule(Serialize):
 
 
 
-###}
\ No newline at end of file
+###}
diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py
index 4085ea5..71a7bc5 100644
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -1,18 +1,41 @@
-from collections import Counter
+from collections import Counter, defaultdict
 
 from ..utils import bfs, fzset, classify
 from ..exceptions import GrammarError
 from ..grammar import Rule, Terminal, NonTerminal
+import time
 
+t_firsts = 0
+t_xy = 0
+t_call = 0
+cache_hits = 0
+cache_misses = 0
+
+# used to be just a tuple (rp, la)
+# but by making it an object,
+# the hash and equality become trivial
+# (slightly faster for sets which are hashtables?)
+class RulePtrLookahead(object):
+    __slots__ = 'rp', 'la'
+
+    def __init__(self, rp, la):
+        self.rp = rp
+        self.la = la
 
 class RulePtr(object):
-    __slots__ = ('rule', 'index')
+    __slots__ = ('rule', 'index', '_advance', '_lookaheads', '_next_rules_by_origin', '_first')
 
     def __init__(self, rule, index):
         assert isinstance(rule, Rule)
         assert index <= len(rule.expansion)
         self.rule = rule
         self.index = index
+        #self._hash = hash((self.rule, self.index))
+        #self._hash = None
+        self._advance = None
+        self._lookaheads = {}
+        self._next_rules_by_origin = None
+        self._first = None
 
     def __repr__(self):
         before = [x.name for x in self.rule.expansion[:self.index]]
@@ -23,32 +46,102 @@ class RulePtr(object):
     def next(self):
         return self.rule.expansion[self.index]
 
+    # don't create duplicate RulePtrs
     def advance(self, sym):
         assert self.next == sym
-        return RulePtr(self.rule, self.index+1)
+        a = self._advance
+        if a is None:
+            a = RulePtr(self.rule, self.index + 1)
+            self._advance = a
+        return a
 
     @property
     def is_satisfied(self):
         return self.index == len(self.rule.expansion)
 
+    def lookahead(self, la):
+        rp_la = self._lookaheads.get(la, None)
+        if rp_la is None:
+            rp_la = RulePtrLookahead(self, la)
+            self._lookaheads[la] = rp_la
+        return rp_la
+
+    def next_rules_by_origin(self, rules_by_origin):
+        n = self._next_rules_by_origin
+        if n is None:
+            n = rules_by_origin[self.next]
+            self._next_rules_by_origin = n
+        return n
+
+    # recursive form of lalr_analyis.py:343 (which is easier to understand IMO)
+    # normally avoid recursion but this allows us to cache
+    # each intermediate step in a corresponding RulePtr
+    def first(self, i, firsts, nullable, t):
+        global cache_hits
+        global cache_misses
+        global t_firsts
+        global t_xy
+        global t_call
+        t_call += time.time() - t
+        n = len(self.rule.expansion)
+        if i == n:
+            return ([], True)
+        x = self._first
+        t_x = time.time()
+        if x is None:
+            t0 = time.time()
+            t_y = time.time()
+            cache_misses += 1
+            s = self.rule.expansion[i]
+            l = list(firsts.get(s, []))
+            b = (s in nullable)
+            if b:
+                t1 = time.time()
+                t_firsts += t1 - t0
+                l_b_2 = self.advance(s).first(i + 1, firsts, nullable, time.time())
+                #l_b_2 = first(self.advance(self.next), i + 1, firsts, nullable, time.time())
+                t0 = time.time()
+                l.extend(l_b_2[0])
+                b = l_b_2[1]
+            x = (l, b)
+            self._first = x
+            t1 = time.time()
+            t_firsts += t1 - t0
+        else:
+            t_y = time.time()
+            cache_hits += 1
+        t_xy += t_y - t_x
+        return x
+
+    # optimizations were made so that there should never be
+    # two distinct equal RulePtrs
+    # should help set/hashtable lookups?
+    '''
     def __eq__(self, other):
         return self.rule == other.rule and self.index == other.index
     def __hash__(self):
-        return hash((self.rule, self.index))
+        return self._hash
+    '''
+
 
 class LR0ItemSet(object):
-    __slots__ = ('kernel', 'closure', 'transitions')
+    __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads', '_hash')
 
     def __init__(self, kernel, closure):
         self.kernel = fzset(kernel)
         self.closure = fzset(closure)
         self.transitions = {}
+        self.lookaheads = defaultdict(set)
+        #self._hash = hash(self.kernel)
 
+    # state generation ensures no duplicate LR0ItemSets
+    '''
     def __eq__(self, other):
         return self.kernel == other.kernel
 
     def __hash__(self):
-        return hash(self.kernel)
+        return self._hash
+    '''
 
     def __repr__(self):
         return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
@@ -153,14 +246,22 @@ class GrammarAnalyzer(object):
                 for start in parser_conf.start}
 
         lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
+        assert(len(lr0_rules) == len(set(lr0_rules)))
 
         self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
 
-        self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
+        # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
+        for root_rule in lr0_root_rules.values():
+            root_rule._rp = RulePtr(root_rule, 0)
+        self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
                 for start, root_rule in lr0_root_rules.items()}
 
         self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
 
+        # unused, did not help
+        self.lr1_cache = {}
+        self.lr1_cache2 = {}
+
     def expand_rule(self, source_rule, rules_by_origin=None):
         "Returns all init_ptrs accessible by rule (recursive)"
 
@@ -172,7 +273,11 @@ class GrammarAnalyzer(object):
             assert not rule.is_term, rule
 
             for r in rules_by_origin[rule]:
-                init_ptr = RulePtr(r, 0)
+                # don't create duplicate RulePtr objects
+                init_ptr = r._rp
+                if init_ptr is None:
+                    init_ptr = RulePtr(r, 0)
+                    r._rp = init_ptr
                 init_ptrs.add(init_ptr)
 
                 if r.expansion: # if not empty rule
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index 61fe692..eb87e7a 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -7,13 +7,16 @@ For now, shift/reduce conflicts are automatically resolved as shifts.
 # Email : erezshin@gmail.com
 
 import logging
-from collections import defaultdict
+from collections import defaultdict, deque
 
 from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
 from ..exceptions import GrammarError
 
 from .grammar_analysis import GrammarAnalyzer, Terminal, RulePtr, LR0ItemSet
 from ..grammar import Rule
+from . import grammar_analysis
+
+import time
 
 ###{standalone
 
@@ -28,6 +31,16 @@ class Action:
 Shift = Action('Shift')
 Reduce = Action('Reduce')
 
+t_set_0 = 0
+t_set_1 = 0
+t_expand = 0
+t_rules = 0
+t_append = 0
+t_z = 0
+t_begin = 0
+t_count = 0
+t_call = 0
+
 class ParseTable:
     def __init__(self, states, start_states, end_states):
         self.states = states
@@ -86,20 +99,24 @@ class LALR_Analyzer(GrammarAnalyzer):
 
     def generate_lr0_states(self):
         self.states = set()
+        # map of kernels to LR0ItemSets
+        cache = {}
 
         def step(state):
             _, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)
 
             d = classify(unsat, lambda rp: rp.next)
             for sym, rps in d.items():
-                kernel = {rp.advance(sym) for rp in rps}
-                closure = set(kernel)
+                kernel = fzset({rp.advance(sym) for rp in rps})
+                new_state = cache.get(kernel, None)
+                if new_state is None:
+                    closure = set(kernel)
+                    for rp in kernel:
+                        if not rp.is_satisfied and not rp.next.is_term:
+                            closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
+                    new_state = LR0ItemSet(kernel, closure)
+                    cache[kernel] = new_state
 
-                for rp in kernel:
-                    if not rp.is_satisfied and not rp.next.is_term:
-                        closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
-
-                new_state = LR0ItemSet(kernel, closure)
                 state.transitions[sym] = new_state
                 yield new_state
 
@@ -109,36 +126,59 @@ class LALR_Analyzer(GrammarAnalyzer):
             pass
 
     def discover_lookaheads(self):
+        # lookaheads is now a member of LR0ItemSet, so don't need to look up a dictionary here
         # state -> rule -> set of lookaheads
-        self.lookaheads = defaultdict(lambda: defaultdict(set))
+        #self.lookaheads = defaultdict(lambda: defaultdict(set))
         # state -> rule -> list of (set of lookaheads) to propagate to
-        self.propagates = defaultdict(lambda: defaultdict(list))
+        #self.propagates = defaultdict(lambda: defaultdict(list))
+        self.propagates = {}
+
+        t0 = time.time()
 
+        t = Terminal('$END')
         for s in self.lr0_start_states.values():
             for rp in s.kernel:
-                self.lookaheads[s][rp].add(Terminal('$END'))
+                #self.lookaheads[s][rp].add(Terminal('$END'))
+                s.lookaheads[rp].add(t)
+
+        t_closure = 0
 
         # There is a 1 to 1 correspondance between LR0 and LALR1 states.
         # We calculate the lookaheads for LALR1 kernel items from the LR0 kernel items.
         # use a terminal that does not exist in the grammar
         t = Terminal('$#')
         for s in self.states:
+            p = {}
+            self.propagates[s] = p
             for rp in s.kernel:
-                for rp2, la in self.generate_lr1_closure([(rp, t)]):
+                q = []
+                p[rp] = q
+                t2 = time.time()
+                z = self.generate_lr1_closure([rp.lookahead(t)], time.time())
+                t3 = time.time()
+                t_closure += t3 - t2
+                #for rp2, la in self.generate_lr1_closure([(rp, t)], time.time()):
+                for rp2_la in z:
+                    rp2 = rp2_la.rp
+                    la = rp2_la.la
                     if rp2.is_satisfied:
                         continue
                     next_symbol = rp2.next
                     next_state = s.transitions[next_symbol]
                     rp3 = rp2.advance(next_symbol)
                     assert(rp3 in next_state.kernel)
-                    x = self.lookaheads[next_state][rp3]
+                    #x = self.lookaheads[next_state][rp3]
+                    x = next_state.lookaheads[rp3]
                     if la == t:
                         # we must propagate rp's lookaheads to rp3's lookahead set
-                        self.propagates[s][rp].append(x)
+                        q.append(x)
                     else:
                         # this lookahead is "generated spontaneously" for rp3
                         x.add(la)
 
+        t1 = time.time()
+        print('Discovering took {:.3f} (generating closure), {:.3f} (total)'.format(t_closure, t1 - t0))
+
     def propagate_lookaheads(self):
         changed = True
         while changed:
@@ -146,7 +186,8 @@ class LALR_Analyzer(GrammarAnalyzer):
             for s in self.states:
                 for rp in s.kernel:
                     # from (from is a keyword)
-                    f = self.lookaheads[s][rp]
+                    #f = self.lookaheads[s][rp]
+                    f = s.lookaheads[rp]
                     # to
                     t = self.propagates[s][rp]
                     for x in t:
@@ -155,20 +196,33 @@ class LALR_Analyzer(GrammarAnalyzer):
                         changed = changed or (len(x) != old)
 
     def generate_lalr1_states(self):
+        t0 = time.time()
         # 1 to 1 correspondance between LR0 and LALR1 states
         # We must fetch the lookaheads we calculated,
         # to create the LALR1 kernels from the LR0 kernels.
         # Then, we generate the LALR1 states by taking the LR1 closure of the new kernel items.
         # map of LR0 states to LALR1 states
         m = {}
+        t_closure = 0
+        z = 0
         for s in self.states:
+            z = max(z, len(s.closure))
             kernel = []
             for rp in s.kernel:
-                las = self.lookaheads[s][rp]
+                #las = self.lookaheads[s][rp]
+                las = s.lookaheads[rp]
                 assert(len(las) > 0)
                 for la in las:
-                    kernel.append((rp, la))
-            m[s] = self.generate_lr1_closure(kernel)
+                    kernel.append(rp.lookahead(la))
+            t0_0 = time.time()
+            m[s] = self.generate_lr1_closure(kernel, time.time())
+            t0_1 = time.time()
+            t_closure += t0_1 - t0_0
+
+        print('Generating lalr1 closure for lalr kernels took {:.3f}'.format(t_closure))
+        print('Max lr0 state size was {}'.format(z))
+
+        t1 = time.time()
 
         self.states = {}
         for s, v in m.items():
@@ -176,8 +230,8 @@ class LALR_Analyzer(GrammarAnalyzer):
             for la, next_state in s.transitions.items():
                 actions[la] = (Shift, next_state.closure)
 
-            sat, _ = classify_bool(v, lambda x: x[0].is_satisfied)
-            reductions = classify(sat, lambda x: x[1], lambda x: x[0])
+            sat, _ = classify_bool(v, lambda x: x.rp.is_satisfied)
+            reductions = classify(sat, lambda x: x.la, lambda x: x.rp)
             for la, rps in reductions.items():
                 if len(rps) > 1:
                     raise GrammarError("Collision in %s: %s" % (la, ', '.join([ str(r.rule) for r in rps ])))
@@ -190,6 +244,8 @@ class LALR_Analyzer(GrammarAnalyzer):
 
             self.states[s.closure] = {k.name: v for k, v in actions.items()}
 
+        t2 = time.time()
+
         end_states = {}
         for s in self.states:
             for rp in s:
@@ -198,44 +254,168 @@ class LALR_Analyzer(GrammarAnalyzer):
                         assert(not start in end_states)
                         end_states[start] = s
 
+        t3 = time.time()
+
         self._parse_table = ParseTable(self.states, {start: state.closure for start, state in self.lr0_start_states.items()}, end_states)
 
+        t4 = time.time()
+
         if self.debug:
             self.parse_table = self._parse_table
         else:
             self.parse_table = IntParseTable.from_ParseTable(self._parse_table)
 
-    def generate_lr1_closure(self, kernel):
+        t5 = time.time()
+
+        print(('Generating lalr1 states took ' + ', '.join([ '{:.3f}' ] * 5)).format(t1 - t0, t2 - t1, t3 - t2, t4 - t3, t5 - t4))
+        print('Generating firsts took {:.3f} (time actually calculating), {:.3f} (end to end), {:.3f} (just function call)'.format(grammar_analysis.t_firsts, grammar_analysis.t_xy, grammar_analysis.t_call))
+
+    def generate_lr1_closure(self, kernel, t_caller):
+        global t_call
+        global t_set_0
+        global t_set_1
+        global t_expand
+        global t_rules
+        global t_append
+        global t_z
+        global t_begin
+        global t_count
+
+        t_start = time.time()
+        t_call += t_start - t_caller
+
+        # cache the results of this function
+        # not many hits, no noticeable performance improvement
+        '''
+        k = fzset(kernel)
+        cached = self.lr1_cache.get(k, None)
+        if not cached is None:
+            return cached
+        '''
+
         closure = set()
+        closure_hash = {}
+
+        y = 0
 
         q = list(kernel)
         while len(q) > 0:
-            rp, la = q.pop()
-            if (rp, la) in closure:
+            t_a = time.time()
+            rp_la = q.pop()
+            #rp_la_hash = hash(rp_la)
+            t0 = time.time()
+            t_begin += t0 - t_a
+            # try to manually maintain hashtable,
+            # as a set of just hashes (ints) was notably faster
+            '''
+            if rp_la_hash in closure_hash:
+                if rp_la in closure_hash[rp_la_hash]:
+                    t0_0 = time.time()
+                    t_set_0 += t0_0 - t0
+                    continue
+                t0_0 = time.time()
+                t_set_0 += t0_0 - t0
+            else:
+                closure_hash[rp_la_hash] = []
+            '''
+            if rp_la in closure:
+                t0_0 = time.time()
+                t_set_0 += t0_0 - t0
                 continue
-            closure.add((rp, la))
+            t0_0 = time.time()
+            closure.add(rp_la)
+            #closure_hash[rp_la_hash].append(rp_la)
+            t1 = time.time()
+            t_set_0 += t0_0 - t0
+            t_set_1 += t1 - t0_0
+            rp = rp_la.rp
+            la = rp_la.la
 
             if rp.is_satisfied:
                 continue
             if rp.next.is_term:
                 continue
 
+            t2 = time.time()
+
+            # cache these calculations inside each RulePtr
+            # see grammar_analysis.py:79
             l = []
+            '''
             i = rp.index + 1
             n = len(rp.rule.expansion)
-            while i < n:
-                s = rp.rule.expansion[i]
-                l.extend(self.FIRST.get(s, []))
-                if not s in self.NULLABLE:
-                    break
-                i += 1
-
+            l2_i = self.lr1_cache2.get((rp.rule, i), None)
+            l2 = []
+            if l2_i is None:
+                while i < n:
+                    s = rp.rule.expansion[i]
+                    l2.extend(self.FIRST.get(s, []))
+                    if not s in self.NULLABLE:
+                        break
+                    i += 1
+                self.lr1_cache2[(rp.rule, i)] = (l2, i)
+            else:
+                l2 = l2_i[0]
+                i = l2_i[1]
+
+            l.extend(l2)
+            '''
+            # this function call seems really slow (see grammar_analysis.t_call above)
+            # tried making it not a method call so don't need to look up vtable
+            # still equally slow
+            l2, nullable = rp.first(rp.index + 1, self.FIRST, self.NULLABLE, time.time())
+            #l2, nullable = grammar_analysis.first(rp, rp.index + 1, self.FIRST, self.NULLABLE, time.time())
+            #l.extend(l2)
+            l = l2
+            t3 = time.time()
+
+            t_expand += t3 - t2
+
+            # if we don't modify l2 and add an extra check in the loop below,
+            # we don't have to copy it
             # if all of rp.rule.expansion[rp.index + 1:] were nullable:
-            if i == n:
-                l.append(la)
+            #if nullable:
+            #    l.append(la)
+
+            t4 = time.time()
+            x = rp.next_rules_by_origin(self.lr0_rules_by_origin)
+            t5 = time.time()
 
-            for r in self.lr0_rules_by_origin[rp.next]:
+            # usually between 20-60? seen as high as ~175
+            y = max(y, len(x) * len(l))
+            #print('adding {} * {} rules to closure max {}'.format(len(x), len(l), y))
+            for r in x:
                 for s in l:
-                    q.append((RulePtr(r, 0), s))
+                    # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
+                    # cache r._rp in _rp (1 less object property lookup?)
+                    _rp = r._rp
+                    if _rp is None:
+                        _rp = RulePtr(r, 0)
+                        r._rp = _rp
+                    q.append(_rp.lookahead(s))
+                    #q.append((r._rp, s))
+                if nullable:
+                    _rp = r._rp
+                    if _rp is None:
+                        _rp = RulePtr(r, 0)
+                        r._rp = _rp
+                    q.append(_rp.lookahead(la))
+                    #q.append((r._rp, la))
+
+            t6 = time.time()
+            t_rules += t5 - t4
+            t_append += t6 - t5
+
+        #self.lr1_cache[k] = closure
+
+        t_end = time.time()
+        t_z += t_end - t_start
+
+        t_count += 1
+
+        if t_count % 1000 == 0:
+            print('\tGenerating lr1 closure took begin {:.3f}, set contains {:.3f}, set add {:.3f}, get first {:.3f}'.format(t_begin, t_set_0, t_set_1, t_expand))
+            print('\tget next rules {:.3f}, append rules {:.3f}, total {:.3f}, call time {:.3f}, count {}'.format(t_rules, t_append, t_z, t_call, t_count))
+            print('\tmax number of appends {}'.format(y))
 
         return closure
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index 6eb3839..b3985ae 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -8,6 +8,8 @@ from ..utils import Enumerator, Serialize
 
 from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
 
+import time
+
 
 ###{standalone
 class LALR_Parser(object):
@@ -15,10 +17,20 @@ class LALR_Parser(object):
         assert all(r.options is None or r.options.priority is None
                    for r in parser_conf.rules), "LALR doesn't yet support prioritization"
         analysis = LALR_Analyzer(parser_conf, debug=debug)
+        t0 = time.time()
         analysis.generate_lr0_states()
+        t1 = time.time()
         analysis.discover_lookaheads()
+        t2 = time.time()
         analysis.propagate_lookaheads()
+        t3 = time.time()
         analysis.generate_lalr1_states()
+        t4 = time.time()
+        print('Generating lr0 states took {:.3f}'.format(t1 - t0))
+        print('Discovering lookaheads took {:.3f}'.format(t2 - t1))
+        print('Propagating lookaheads took took {:.3f}'.format(t3 - t2))
+        print('Generating lalr states (closure) took {:.3f}'.format(t4 - t3))
+        print('-' * 32)
         callbacks = parser_conf.callbacks
 
         self._parse_table = analysis.parse_table

From 0c59cba3f5329381fc75a1a37a8426c15165b230 Mon Sep 17 00:00:00 2001
From: Raekye <Raekye@users.noreply.github.com>
Date: Fri, 9 Aug 2019 03:26:27 -0400
Subject: [PATCH 032/132] implement DeRemer and Pennello's lookahead algorithm
 for LALR(1)

---
 lark/grammar.py                  |   4 +-
 lark/parsers/grammar_analysis.py | 110 +-------
 lark/parsers/lalr_analysis.py    | 432 +++++++++++--------------------
 lark/parsers/lalr_parser.py      |  27 +-
 4 files changed, 169 insertions(+), 404 deletions(-)

diff --git a/lark/grammar.py b/lark/grammar.py
index f90cce4..3480651 100644
--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -28,7 +28,7 @@ class Symbol(Serialize):
 
 
 class Terminal(Symbol):
-    __serialize_fields__ = 'name', 'filter_out'
+    __serialize_fields__ = 'name', 'filter_out', '_hash'
 
     is_term = True
 
@@ -44,7 +44,7 @@ class Terminal(Symbol):
 
 
 class NonTerminal(Symbol):
-    __serialize_fields__ = 'name',
+    __serialize_fields__ = 'name', '_hash'
 
     is_term = False
 
diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py
index 71a7bc5..b32f62f 100644
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -5,37 +5,18 @@ from ..exceptions import GrammarError
 from ..grammar import Rule, Terminal, NonTerminal
 import time
 
-t_firsts = 0
-t_xy = 0
-t_call = 0
-cache_hits = 0
-cache_misses = 0
-
-# used to be just a tuple (rp, la)
-# but by making it an object,
-# the hash and equality become trivial
-# (slightly faster for sets which are hashtables?)
-class RulePtrLookahead(object):
-    __slots__ = 'rp', 'la'
-
-    def __init__(self, rp, la):
-        self.rp = rp
-        self.la = la
 
+# optimizations were made so that there should never be two distinct equal RulePtrs
+# to help with hashtable lookup
 class RulePtr(object):
-    __slots__ = ('rule', 'index', '_advance', '_lookaheads', '_next_rules_by_origin', '_first')
+    __slots__ = ('rule', 'index', '_advance')
 
     def __init__(self, rule, index):
         assert isinstance(rule, Rule)
         assert index <= len(rule.expansion)
         self.rule = rule
         self.index = index
-        #self._hash = hash((self.rule, self.index))
-        #self._hash = None
         self._advance = None
-        self._lookaheads = {}
-        self._next_rules_by_origin = None
-        self._first = None
 
     def __repr__(self):
         before = [x.name for x in self.rule.expansion[:self.index]]
@@ -59,89 +40,16 @@ class RulePtr(object):
     def is_satisfied(self):
         return self.index == len(self.rule.expansion)
 
-    def lookahead(self, la):
-        rp_la = self._lookaheads.get(la, None)
-        if rp_la is None:
-            rp_la = RulePtrLookahead(self, la)
-            self._lookaheads[la] = rp_la
-        return rp_la
-
-    def next_rules_by_origin(self, rules_by_origin):
-        n = self._next_rules_by_origin
-        if n is None:
-            n = rules_by_origin[self.next]
-            self._next_rules_by_origin = n
-        return n
-
-    # recursive form of lalr_analyis.py:343 (which is easier to understand IMO)
-    # normally avoid recursion but this allows us to cache
-    # each intermediate step in a corresponding RulePtr
-    def first(self, i, firsts, nullable, t):
-        global cache_hits
-        global cache_misses
-        global t_firsts
-        global t_xy
-        global t_call
-        t_call += time.time() - t
-        n = len(self.rule.expansion)
-        if i == n:
-            return ([], True)
-        x = self._first
-        t_x = time.time()
-        if x is None:
-            t0 = time.time()
-            t_y = time.time()
-            cache_misses += 1
-            s = self.rule.expansion[i]
-            l = list(firsts.get(s, []))
-            b = (s in nullable)
-            if b:
-                t1 = time.time()
-                t_firsts += t1 - t0
-                l_b_2 = self.advance(s).first(i + 1, firsts, nullable, time.time())
-                #l_b_2 = first(self.advance(self.next), i + 1, firsts, nullable, time.time())
-                t0 = time.time()
-                l.extend(l_b_2[0])
-                b = l_b_2[1]
-            x = (l, b)
-            self._first = x
-            t1 = time.time()
-            t_firsts += t1 - t0
-        else:
-            t_y = time.time()
-            cache_hits += 1
-        t_xy += t_y - t_x
-        return x
-
-    # optimizations were made so that there should never be
-    # two distinct equal RulePtrs
-    # should help set/hashtable lookups?
-    '''
-    def __eq__(self, other):
-        return self.rule == other.rule and self.index == other.index
-    def __hash__(self):
-        return self._hash
-    '''
-
 
+# state generation ensures no duplicate LR0ItemSets
 class LR0ItemSet(object):
-    __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads', '_hash')
+    __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
 
     def __init__(self, kernel, closure):
         self.kernel = fzset(kernel)
         self.closure = fzset(closure)
         self.transitions = {}
         self.lookaheads = defaultdict(set)
-        #self._hash = hash(self.kernel)
-
-    # state generation ensures no duplicate LR0ItemSets
-    '''
-    def __eq__(self, other):
-        return self.kernel == other.kernel
-
-    def __hash__(self):
-        return self._hash
-    '''
 
     def __repr__(self):
         return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
@@ -258,9 +166,11 @@ class GrammarAnalyzer(object):
 
         self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
 
-        # unused, did not help
-        self.lr1_cache = {}
-        self.lr1_cache2 = {}
+        self.nonterminal_transitions = []
+        self.directly_reads = defaultdict(set)
+        self.reads = defaultdict(set)
+        self.includes = defaultdict(set)
+        self.lookback = defaultdict(set)
 
     def expand_rule(self, source_rule, rules_by_origin=None):
         "Returns all init_ptrs accessible by rule (recursive)"
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index eb87e7a..4104713 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -12,9 +12,8 @@ from collections import defaultdict, deque
 from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
 from ..exceptions import GrammarError
 
-from .grammar_analysis import GrammarAnalyzer, Terminal, RulePtr, LR0ItemSet
+from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
 from ..grammar import Rule
-from . import grammar_analysis
 
 import time
 
@@ -31,15 +30,6 @@ class Action:
 Shift = Action('Shift')
 Reduce = Action('Reduce')
 
-t_set_0 = 0
-t_set_1 = 0
-t_expand = 0
-t_rules = 0
-t_append = 0
-t_z = 0
-t_begin = 0
-t_count = 0
-t_call = 0
 
 class ParseTable:
     def __init__(self, states, start_states, end_states):
@@ -95,9 +85,60 @@ class IntParseTable(ParseTable):
 
 ###}
 
+
+# digraph and traverse, see The Theory and Practice of Compiler Writing
+
+# computes F(x) = G(x) union (union { G(y) | x R y })
+# X: nodes
+# R: relation (function mapping node -> list of nodes that satisfy the relation)
+# G: set valued function
+def digraph(X, R, G):
+    F = {}
+    S = []
+    N = {}
+    for x in X:
+        N[x] = 0
+    for x in X:
+        # this is always true for the first iteration, but N[x] may be updated in traverse below
+        if N[x] == 0:
+            traverse(x, S, N, X, R, G, F)
+    return F
+
+# x: single node
+# S: stack
+# N: weights
+# X: nodes
+# R: relation (see above)
+# G: set valued function
+# F: set valued function we are computing (map of input -> output)
+def traverse(x, S, N, X, R, G, F):
+    S.append(x)
+    d = len(S)
+    N[x] = d
+    F[x] = G(x)
+    for y in R(x):
+        if N[y] == 0:
+            traverse(y, S, N, X, R, G, F)
+        n_x = N[x]
+        assert(n_x > 0)
+        n_y = N[y]
+        assert(n_y != 0)
+        if (n_y > 0) and (n_y < n_x):
+            N[x] = n_y
+        F[x].update(F[y])
+    if N[x] == d:
+        f_x = F[x]
+        while True:
+            z = S.pop()
+            N[z] = -1
+            F[z] = f_x
+            if z == x:
+                break
+
+
 class LALR_Analyzer(GrammarAnalyzer):
 
-    def generate_lr0_states(self):
+    def compute_lr0_states(self):
         self.states = set()
         # map of kernels to LR0ItemSets
         cache = {}
@@ -125,297 +166,118 @@ class LALR_Analyzer(GrammarAnalyzer):
         for _ in bfs(self.lr0_start_states.values(), step):
             pass
 
-    def discover_lookaheads(self):
-        # lookaheads is now a member of LR0ItemSet, so don't need to look up a dictionary here
-        # state -> rule -> set of lookaheads
-        #self.lookaheads = defaultdict(lambda: defaultdict(set))
-        # state -> rule -> list of (set of lookaheads) to propagate to
-        #self.propagates = defaultdict(lambda: defaultdict(list))
-        self.propagates = {}
-
-        t0 = time.time()
-
-        t = Terminal('$END')
-        for s in self.lr0_start_states.values():
-            for rp in s.kernel:
-                #self.lookaheads[s][rp].add(Terminal('$END'))
-                s.lookaheads[rp].add(t)
-
-        t_closure = 0
-
-        # There is a 1 to 1 correspondance between LR0 and LALR1 states.
-        # We calculate the lookaheads for LALR1 kernel items from the LR0 kernel items.
-        # use a terminal that does not exist in the grammar
-        t = Terminal('$#')
-        for s in self.states:
-            p = {}
-            self.propagates[s] = p
-            for rp in s.kernel:
-                q = []
-                p[rp] = q
-                t2 = time.time()
-                z = self.generate_lr1_closure([rp.lookahead(t)], time.time())
-                t3 = time.time()
-                t_closure += t3 - t2
-                #for rp2, la in self.generate_lr1_closure([(rp, t)], time.time()):
-                for rp2_la in z:
-                    rp2 = rp2_la.rp
-                    la = rp2_la.la
+    def compute_reads_relations(self):
+        # handle start state
+        for root in self.lr0_start_states.values():
+            assert(len(root.kernel) == 1)
+            for rp in root.kernel:
+                assert(rp.index == 0)
+                self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
+
+        for state in self.states:
+            seen = set()
+            for rp in state.closure:
+                if rp.is_satisfied:
+                    continue
+                s = rp.next
+                # if s is a not a nonterminal
+                if not s in self.lr0_rules_by_origin:
+                    continue
+                if s in seen:
+                    continue
+                seen.add(s)
+                nt = (state, s)
+                self.nonterminal_transitions.append(nt)
+                dr = self.directly_reads[nt]
+                r = self.reads[nt]
+                next_state = state.transitions[s]
+                for rp2 in next_state.closure:
                     if rp2.is_satisfied:
                         continue
-                    next_symbol = rp2.next
-                    next_state = s.transitions[next_symbol]
-                    rp3 = rp2.advance(next_symbol)
-                    assert(rp3 in next_state.kernel)
-                    #x = self.lookaheads[next_state][rp3]
-                    x = next_state.lookaheads[rp3]
-                    if la == t:
-                        # we must propagate rp's lookaheads to rp3's lookahead set
-                        q.append(x)
+                    s2 = rp2.next
+                    # if s2 is a terminal
+                    if not s2 in self.lr0_rules_by_origin:
+                        dr.add(s2)
+                    if s2 in self.NULLABLE:
+                        r.add((next_state, s2))
+
+    def compute_read_sets(self):
+        R = lambda nt: self.reads[nt]
+        G = lambda nt: self.directly_reads[nt]
+        self.read_sets = digraph(self.nonterminal_transitions, R, G)
+
+    def compute_includes_lookback(self):
+        for nt in self.nonterminal_transitions:
+            state, nonterminal = nt
+            includes = []
+            lookback = self.lookback[nt]
+            for rp in state.closure:
+                if rp.rule.origin != nonterminal:
+                    continue
+                # traverse the states for rp(.rule)
+                state2 = state
+                for i in range(rp.index, len(rp.rule.expansion)):
+                    s = rp.rule.expansion[i]
+                    nt2 = (state2, s)
+                    state2 = state2.transitions[s]
+                    if not nt2 in self.reads:
+                        continue
+                    j = i + 1
+                    for j in range(i + 1, len(rp.rule.expansion)):
+                        if not rp.rule.expansion[j] in self.NULLABLE:
+                            break
                     else:
-                        # this lookahead is "generated spontaneously" for rp3
-                        x.add(la)
-
-        t1 = time.time()
-        print('Discovering took {:.3f} (generating closure), {:.3f} (total)'.format(t_closure, t1 - t0))
-
-    def propagate_lookaheads(self):
-        changed = True
-        while changed:
-            changed = False
-            for s in self.states:
-                for rp in s.kernel:
-                    # from (from is a keyword)
-                    #f = self.lookaheads[s][rp]
-                    f = s.lookaheads[rp]
-                    # to
-                    t = self.propagates[s][rp]
-                    for x in t:
-                        old = len(x)
-                        x |= f
-                        changed = changed or (len(x) != old)
-
-    def generate_lalr1_states(self):
-        t0 = time.time()
-        # 1 to 1 correspondance between LR0 and LALR1 states
-        # We must fetch the lookaheads we calculated,
-        # to create the LALR1 kernels from the LR0 kernels.
-        # Then, we generate the LALR1 states by taking the LR1 closure of the new kernel items.
-        # map of LR0 states to LALR1 states
+                        includes.append(nt2)
+                # state2 is at the final state for rp.rule
+                if rp.index == 0:
+                    for rp2 in state2.closure:
+                        if (rp2.rule == rp.rule) and rp2.is_satisfied:
+                            lookback.add((state2, rp2.rule))
+            for nt2 in includes:
+                self.includes[nt2].add(nt)
+
+    def compute_follow_sets(self):
+        R = lambda nt: self.includes[nt]
+        G = lambda nt: self.read_sets[nt]
+        self.follow_sets = digraph(self.nonterminal_transitions, R, G)
+
+    def compute_lookaheads(self):
+        for nt, lookbacks in self.lookback.items():
+            for state, rule in lookbacks:
+                for s in self.follow_sets[nt]:
+                    state.lookaheads[s].add(rule)
+
+    def compute_lalr1_states(self):
         m = {}
-        t_closure = 0
-        z = 0
-        for s in self.states:
-            z = max(z, len(s.closure))
-            kernel = []
-            for rp in s.kernel:
-                #las = self.lookaheads[s][rp]
-                las = s.lookaheads[rp]
-                assert(len(las) > 0)
-                for la in las:
-                    kernel.append(rp.lookahead(la))
-            t0_0 = time.time()
-            m[s] = self.generate_lr1_closure(kernel, time.time())
-            t0_1 = time.time()
-            t_closure += t0_1 - t0_0
-
-        print('Generating lalr1 closure for lalr kernels took {:.3f}'.format(t_closure))
-        print('Max lr0 state size was {}'.format(z))
-
-        t1 = time.time()
-
-        self.states = {}
-        for s, v in m.items():
+        for state in self.states:
             actions = {}
-            for la, next_state in s.transitions.items():
+            for la, next_state in state.transitions.items():
                 actions[la] = (Shift, next_state.closure)
-
-            sat, _ = classify_bool(v, lambda x: x.rp.is_satisfied)
-            reductions = classify(sat, lambda x: x.la, lambda x: x.rp)
-            for la, rps in reductions.items():
-                if len(rps) > 1:
-                    raise GrammarError("Collision in %s: %s" % (la, ', '.join([ str(r.rule) for r in rps ])))
+            for la, rules in state.lookaheads.items():
+                if len(rules) > 1:
+                    raise GrammarError('Collision in %s: %s' % (la, ', '.join([ str(r) for r in rules ])))
                 if la in actions:
                     if self.debug:
-                        logging.warning("Shift/reduce conflict for terminal %s:  (resolving as shift)", la.name)
-                        logging.warning(' * %s', str(rps[0]))
+                        logging.warning('Shift/reduce conflict for terminal %s: (resolving as shift)', la.name)
+                        logging.warning(' * %s', list(rules)[0])
                 else:
-                    actions[la] = (Reduce, rps[0].rule)
+                    actions[la] = (Reduce, list(rules)[0])
+            m[state] = { k.name: v for k, v in actions.items() }
 
-            self.states[s.closure] = {k.name: v for k, v in actions.items()}
-
-        t2 = time.time()
+        self.states = { k.closure: v for k, v in m.items() }
 
+        # compute end states
         end_states = {}
-        for s in self.states:
-            for rp in s:
+        for state in self.states:
+            for rp in state:
                 for start in self.lr0_start_states:
                     if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
                         assert(not start in end_states)
-                        end_states[start] = s
-
-        t3 = time.time()
+                        end_states[start] = state
 
-        self._parse_table = ParseTable(self.states, {start: state.closure for start, state in self.lr0_start_states.items()}, end_states)
-
-        t4 = time.time()
+        self._parse_table = ParseTable(self.states, { start: state.closure for start, state in self.lr0_start_states.items() }, end_states)
 
         if self.debug:
             self.parse_table = self._parse_table
         else:
             self.parse_table = IntParseTable.from_ParseTable(self._parse_table)
-
-        t5 = time.time()
-
-        print(('Generating lalr1 states took ' + ', '.join([ '{:.3f}' ] * 5)).format(t1 - t0, t2 - t1, t3 - t2, t4 - t3, t5 - t4))
-        print('Generating firsts took {:.3f} (time actually calculating), {:.3f} (end to end), {:.3f} (just function call)'.format(grammar_analysis.t_firsts, grammar_analysis.t_xy, grammar_analysis.t_call))
-
-    def generate_lr1_closure(self, kernel, t_caller):
-        global t_call
-        global t_set_0
-        global t_set_1
-        global t_expand
-        global t_rules
-        global t_append
-        global t_z
-        global t_begin
-        global t_count
-
-        t_start = time.time()
-        t_call += t_start - t_caller
-
-        # cache the results of this function
-        # not many hits, no noticeable performance improvement
-        '''
-        k = fzset(kernel)
-        cached = self.lr1_cache.get(k, None)
-        if not cached is None:
-            return cached
-        '''
-
-        closure = set()
-        closure_hash = {}
-
-        y = 0
-
-        q = list(kernel)
-        while len(q) > 0:
-            t_a = time.time()
-            rp_la = q.pop()
-            #rp_la_hash = hash(rp_la)
-            t0 = time.time()
-            t_begin += t0 - t_a
-            # try to manually maintain hashtable,
-            # as a set of just hashes (ints) was notably faster
-            '''
-            if rp_la_hash in closure_hash:
-                if rp_la in closure_hash[rp_la_hash]:
-                    t0_0 = time.time()
-                    t_set_0 += t0_0 - t0
-                    continue
-                t0_0 = time.time()
-                t_set_0 += t0_0 - t0
-            else:
-                closure_hash[rp_la_hash] = []
-            '''
-            if rp_la in closure:
-                t0_0 = time.time()
-                t_set_0 += t0_0 - t0
-                continue
-            t0_0 = time.time()
-            closure.add(rp_la)
-            #closure_hash[rp_la_hash].append(rp_la)
-            t1 = time.time()
-            t_set_0 += t0_0 - t0
-            t_set_1 += t1 - t0_0
-            rp = rp_la.rp
-            la = rp_la.la
-
-            if rp.is_satisfied:
-                continue
-            if rp.next.is_term:
-                continue
-
-            t2 = time.time()
-
-            # cache these calculations inside each RulePtr
-            # see grammar_analysis.py:79
-            l = []
-            '''
-            i = rp.index + 1
-            n = len(rp.rule.expansion)
-            l2_i = self.lr1_cache2.get((rp.rule, i), None)
-            l2 = []
-            if l2_i is None:
-                while i < n:
-                    s = rp.rule.expansion[i]
-                    l2.extend(self.FIRST.get(s, []))
-                    if not s in self.NULLABLE:
-                        break
-                    i += 1
-                self.lr1_cache2[(rp.rule, i)] = (l2, i)
-            else:
-                l2 = l2_i[0]
-                i = l2_i[1]
-
-            l.extend(l2)
-            '''
-            # this function call seems really slow (see grammar_analysis.t_call above)
-            # tried making it not a method call so don't need to look up vtable
-            # still equally slow
-            l2, nullable = rp.first(rp.index + 1, self.FIRST, self.NULLABLE, time.time())
-            #l2, nullable = grammar_analysis.first(rp, rp.index + 1, self.FIRST, self.NULLABLE, time.time())
-            #l.extend(l2)
-            l = l2
-            t3 = time.time()
-
-            t_expand += t3 - t2
-
-            # if we don't modify l2 and add an extra check in the loop below,
-            # we don't have to copy it
-            # if all of rp.rule.expansion[rp.index + 1:] were nullable:
-            #if nullable:
-            #    l.append(la)
-
-            t4 = time.time()
-            x = rp.next_rules_by_origin(self.lr0_rules_by_origin)
-            t5 = time.time()
-
-            # usually between 20-60? seen as high as ~175
-            y = max(y, len(x) * len(l))
-            #print('adding {} * {} rules to closure max {}'.format(len(x), len(l), y))
-            for r in x:
-                for s in l:
-                    # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
-                    # cache r._rp in _rp (1 less object property lookup?)
-                    _rp = r._rp
-                    if _rp is None:
-                        _rp = RulePtr(r, 0)
-                        r._rp = _rp
-                    q.append(_rp.lookahead(s))
-                    #q.append((r._rp, s))
-                if nullable:
-                    _rp = r._rp
-                    if _rp is None:
-                        _rp = RulePtr(r, 0)
-                        r._rp = _rp
-                    q.append(_rp.lookahead(la))
-                    #q.append((r._rp, la))
-
-            t6 = time.time()
-            t_rules += t5 - t4
-            t_append += t6 - t5
-
-        #self.lr1_cache[k] = closure
-
-        t_end = time.time()
-        t_z += t_end - t_start
-
-        t_count += 1
-
-        if t_count % 1000 == 0:
-            print('\tGenerating lr1 closure took begin {:.3f}, set contains {:.3f}, set add {:.3f}, get first {:.3f}'.format(t_begin, t_set_0, t_set_1, t_expand))
-            print('\tget next rules {:.3f}, append rules {:.3f}, total {:.3f}, call time {:.3f}, count {}'.format(t_rules, t_append, t_z, t_call, t_count))
-            print('\tmax number of appends {}'.format(y))
-
-        return closure
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index b3985ae..657e795 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -17,20 +17,13 @@ class LALR_Parser(object):
         assert all(r.options is None or r.options.priority is None
                    for r in parser_conf.rules), "LALR doesn't yet support prioritization"
         analysis = LALR_Analyzer(parser_conf, debug=debug)
-        t0 = time.time()
-        analysis.generate_lr0_states()
-        t1 = time.time()
-        analysis.discover_lookaheads()
-        t2 = time.time()
-        analysis.propagate_lookaheads()
-        t3 = time.time()
-        analysis.generate_lalr1_states()
-        t4 = time.time()
-        print('Generating lr0 states took {:.3f}'.format(t1 - t0))
-        print('Discovering lookaheads took {:.3f}'.format(t2 - t1))
-        print('Propagating lookaheads took took {:.3f}'.format(t3 - t2))
-        print('Generating lalr states (closure) took {:.3f}'.format(t4 - t3))
-        print('-' * 32)
+        analysis.compute_lr0_states()
+        analysis.compute_reads_relations()
+        analysis.compute_read_sets()
+        analysis.compute_includes_lookback()
+        analysis.compute_follow_sets()
+        analysis.compute_lookaheads()
+        analysis.compute_lalr1_states()
         callbacks = parser_conf.callbacks
 
         self._parse_table = analysis.parse_table
@@ -80,9 +73,6 @@ class _Parser:
                 raise UnexpectedToken(token, expected, state=state)
 
         def reduce(rule):
-            if state_stack[-1] == end_state:
-                return True
-
             size = len(rule.expansion)
             if size:
                 s = value_stack[-size:]
@@ -98,6 +88,9 @@ class _Parser:
             state_stack.append(new_state)
             value_stack.append(value)
 
+            if state_stack[-1] == end_state:
+                return True
+
             return False
 
         # Main LALR-parser loop

From de24fa055df13dc1fdb0edd5cd4e8faed6bd2a6a Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Fri, 9 Aug 2019 12:41:50 +0200
Subject: [PATCH 033/132] Saving _hash for symbols isn't necessary

---
 lark/grammar.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lark/grammar.py b/lark/grammar.py
index 3480651..d975a19 100644
--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -3,13 +3,12 @@ from .utils import Serialize
 ###{standalone
 
 class Symbol(Serialize):
-    __slots__ = ('name', '_hash')
+    __slots__ = ('name',)
 
     is_term = NotImplemented
 
     def __init__(self, name):
         self.name = name
-        self._hash = hash(self.name)
 
     def __eq__(self, other):
         assert isinstance(other, Symbol), other
@@ -19,7 +18,7 @@ class Symbol(Serialize):
         return not (self == other)
 
     def __hash__(self):
-        return self._hash
+        return hash(self.name)
 
     def __repr__(self):
         return '%s(%r)' % (type(self).__name__, self.name)
@@ -28,13 +27,12 @@ class Symbol(Serialize):
 
 
 class Terminal(Symbol):
-    __serialize_fields__ = 'name', 'filter_out', '_hash'
+    __serialize_fields__ = 'name', 'filter_out'
 
     is_term = True
 
     def __init__(self, name, filter_out=False):
         self.name = name
-        self._hash = hash(self.name)
         self.filter_out = filter_out
 
     @property
@@ -44,7 +42,7 @@ class Terminal(Symbol):
 
 
 class NonTerminal(Symbol):
-    __serialize_fields__ = 'name', '_hash'
+    __serialize_fields__ = 'name',
 
     is_term = False
 

From 8466981c084e4fb84dae068dbefbdf77a9273c47 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Fri, 9 Aug 2019 13:25:32 +0200
Subject: [PATCH 034/132] Cleanup

---
 lark/grammar.py                  |  3 +-
 lark/parsers/grammar_analysis.py | 34 ++++++----------------
 lark/parsers/lalr_analysis.py    | 49 ++++++++++++++++++--------------
 lark/parsers/lalr_parser.py      | 18 ++----------
 4 files changed, 40 insertions(+), 64 deletions(-)

diff --git a/lark/grammar.py b/lark/grammar.py
index d975a19..91435b2 100644
--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -71,7 +71,7 @@ class Rule(Serialize):
         expansion : a list of symbols
         order : index of this expansion amongst all rules of the same name
     """
-    __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp')
+    __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
 
     __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
     __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
@@ -83,7 +83,6 @@ class Rule(Serialize):
         self.order = order
         self.options = options
         self._hash = hash((self.origin, tuple(self.expansion)))
-        self._rp = None
 
     def _deserialize(self):
         self._hash = hash((self.origin, tuple(self.expansion)))
diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py
index b32f62f..94c32cc 100644
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -3,20 +3,16 @@ from collections import Counter, defaultdict
 from ..utils import bfs, fzset, classify
 from ..exceptions import GrammarError
 from ..grammar import Rule, Terminal, NonTerminal
-import time
 
 
-# optimizations were made so that there should never be two distinct equal RulePtrs
-# to help with hashtable lookup
 class RulePtr(object):
-    __slots__ = ('rule', 'index', '_advance')
+    __slots__ = ('rule', 'index')
 
     def __init__(self, rule, index):
         assert isinstance(rule, Rule)
         assert index <= len(rule.expansion)
         self.rule = rule
         self.index = index
-        self._advance = None
 
     def __repr__(self):
         before = [x.name for x in self.rule.expansion[:self.index]]
@@ -27,19 +23,19 @@ class RulePtr(object):
     def next(self):
         return self.rule.expansion[self.index]
 
-    # don't create duplicate RulePtrs
     def advance(self, sym):
         assert self.next == sym
-        a = self._advance
-        if a is None:
-            a = RulePtr(self.rule, self.index + 1)
-            self._advance = a
-        return a
+        return RulePtr(self.rule, self.index+1)
 
     @property
     def is_satisfied(self):
         return self.index == len(self.rule.expansion)
 
+    def __eq__(self, other):
+        return self.rule == other.rule and self.index == other.index
+    def __hash__(self):
+        return hash((self.rule, self.index))
+
 
 # state generation ensures no duplicate LR0ItemSets
 class LR0ItemSet(object):
@@ -159,19 +155,11 @@ class GrammarAnalyzer(object):
         self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
 
         # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
-        for root_rule in lr0_root_rules.values():
-            root_rule._rp = RulePtr(root_rule, 0)
-        self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
+        self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
                 for start, root_rule in lr0_root_rules.items()}
 
         self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
 
-        self.nonterminal_transitions = []
-        self.directly_reads = defaultdict(set)
-        self.reads = defaultdict(set)
-        self.includes = defaultdict(set)
-        self.lookback = defaultdict(set)
-
     def expand_rule(self, source_rule, rules_by_origin=None):
         "Returns all init_ptrs accessible by rule (recursive)"
 
@@ -183,11 +171,7 @@ class GrammarAnalyzer(object):
             assert not rule.is_term, rule
 
             for r in rules_by_origin[rule]:
-                # don't create duplicate RulePtr objects
-                init_ptr = r._rp
-                if init_ptr is None:
-                    init_ptr = RulePtr(r, 0)
-                    r._rp = init_ptr
+                init_ptr = RulePtr(r, 0)
                 init_ptrs.add(init_ptr)
 
                 if r.expansion: # if not empty rule
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index 4104713..4af2c24 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -15,8 +15,6 @@ from ..exceptions import GrammarError
 from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
 from ..grammar import Rule
 
-import time
-
 ###{standalone
 
 class Action:
@@ -115,8 +113,8 @@ def traverse(x, S, N, X, R, G, F):
     S.append(x)
     d = len(S)
     N[x] = d
-    F[x] = G(x)
-    for y in R(x):
+    F[x] = G[x]
+    for y in R[x]:
         if N[y] == 0:
             traverse(y, S, N, X, R, G, F)
         n_x = N[x]
@@ -137,9 +135,17 @@ def traverse(x, S, N, X, R, G, F):
 
 
 class LALR_Analyzer(GrammarAnalyzer):
+    def __init__(self, parser_conf, debug=False):
+        GrammarAnalyzer.__init__(self, parser_conf, debug)
+        self.nonterminal_transitions = []
+        self.directly_reads = defaultdict(set)
+        self.reads = defaultdict(set)
+        self.includes = defaultdict(set)
+        self.lookback = defaultdict(set)
+
 
     def compute_lr0_states(self):
-        self.states = set()
+        self.lr0_states = set()
         # map of kernels to LR0ItemSets
         cache = {}
 
@@ -161,7 +167,7 @@ class LALR_Analyzer(GrammarAnalyzer):
                 state.transitions[sym] = new_state
                 yield new_state
 
-            self.states.add(state)
+            self.lr0_states.add(state)
 
         for _ in bfs(self.lr0_start_states.values(), step):
             pass
@@ -174,14 +180,14 @@ class LALR_Analyzer(GrammarAnalyzer):
                 assert(rp.index == 0)
                 self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])
 
-        for state in self.states:
+        for state in self.lr0_states:
             seen = set()
             for rp in state.closure:
                 if rp.is_satisfied:
                     continue
                 s = rp.next
                 # if s is a not a nonterminal
-                if not s in self.lr0_rules_by_origin:
+                if s not in self.lr0_rules_by_origin:
                     continue
                 if s in seen:
                     continue
@@ -201,11 +207,6 @@ class LALR_Analyzer(GrammarAnalyzer):
                     if s2 in self.NULLABLE:
                         r.add((next_state, s2))
 
-    def compute_read_sets(self):
-        R = lambda nt: self.reads[nt]
-        G = lambda nt: self.directly_reads[nt]
-        self.read_sets = digraph(self.nonterminal_transitions, R, G)
-
     def compute_includes_lookback(self):
         for nt in self.nonterminal_transitions:
             state, nonterminal = nt
@@ -220,9 +221,8 @@ class LALR_Analyzer(GrammarAnalyzer):
                     s = rp.rule.expansion[i]
                     nt2 = (state2, s)
                     state2 = state2.transitions[s]
-                    if not nt2 in self.reads:
+                    if nt2 not in self.reads:
                         continue
-                    j = i + 1
                     for j in range(i + 1, len(rp.rule.expansion)):
                         if not rp.rule.expansion[j] in self.NULLABLE:
                             break
@@ -236,20 +236,18 @@ class LALR_Analyzer(GrammarAnalyzer):
             for nt2 in includes:
                 self.includes[nt2].add(nt)
 
-    def compute_follow_sets(self):
-        R = lambda nt: self.includes[nt]
-        G = lambda nt: self.read_sets[nt]
-        self.follow_sets = digraph(self.nonterminal_transitions, R, G)
-
     def compute_lookaheads(self):
+        read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
+        follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)
+
         for nt, lookbacks in self.lookback.items():
             for state, rule in lookbacks:
-                for s in self.follow_sets[nt]:
+                for s in follow_sets[nt]:
                     state.lookaheads[s].add(rule)
 
     def compute_lalr1_states(self):
         m = {}
-        for state in self.states:
+        for state in self.lr0_states:
             actions = {}
             for la, next_state in state.transitions.items():
                 actions[la] = (Shift, next_state.closure)
@@ -281,3 +279,10 @@ class LALR_Analyzer(GrammarAnalyzer):
             self.parse_table = self._parse_table
         else:
             self.parse_table = IntParseTable.from_ParseTable(self._parse_table)
+
+    def compute_lalr(self):
+        self.compute_lr0_states()
+        self.compute_reads_relations()
+        self.compute_includes_lookback()
+        self.compute_lookaheads()
+        self.compute_lalr1_states()
\ No newline at end of file
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index 657e795..82c8bba 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -8,8 +8,6 @@ from ..utils import Enumerator, Serialize
 
 from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
 
-import time
-
 
 ###{standalone
 class LALR_Parser(object):
@@ -17,13 +15,7 @@ class LALR_Parser(object):
         assert all(r.options is None or r.options.priority is None
                    for r in parser_conf.rules), "LALR doesn't yet support prioritization"
         analysis = LALR_Analyzer(parser_conf, debug=debug)
-        analysis.compute_lr0_states()
-        analysis.compute_reads_relations()
-        analysis.compute_read_sets()
-        analysis.compute_includes_lookback()
-        analysis.compute_follow_sets()
-        analysis.compute_lookaheads()
-        analysis.compute_lalr1_states()
+        analysis.compute_lalr()
         callbacks = parser_conf.callbacks
 
         self._parse_table = analysis.parse_table
@@ -88,11 +80,6 @@ class _Parser:
             state_stack.append(new_state)
             value_stack.append(value)
 
-            if state_stack[-1] == end_state:
-                return True
-
-            return False
-
         # Main LALR-parser loop
         for token in stream:
             while True:
@@ -111,7 +98,8 @@ class _Parser:
         while True:
             _action, arg = get_action(token)
             assert(_action is Reduce)
-            if reduce(arg):
+            reduce(arg)
+            if state_stack[-1] == end_state:
                 return value_stack[-1]
 
 ###}

From e8c67839c22956586ae6f63e59a727565cd81ba9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A1bio=20Mac=C3=AAdo=20Mendes?=
 <fabiomacedomendes@gmail.com>
Date: Fri, 9 Aug 2019 18:53:02 -0300
Subject: [PATCH 035/132] Test if lexer correctly detects newlines

---
 tests/test_parser.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tests/test_parser.py b/tests/test_parser.py
index 599406f..d0aeb1c 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1558,6 +1558,27 @@ def _make_parser_test(LEXER, PARSER):
             self.assertEqual(parser.parse('xa', 'a'), Tree('a', []))
             self.assertEqual(parser.parse('xb', 'b'), Tree('b', []))
 
+        def test_lexer_detect_newline_tokens(self):
+            # Detect newlines in regular tokens
+            g = Lark(r"""start: "go" tail*
+            tail : SA "a" | SB "b" | SC "c" | SD "d"
+            SA : /\n/
+            SB : /./
+            SC : /[^a-z]/
+            SD : /\s/g
+            """, parser=PARSER, lexer=LEXER)
+            _, _, a, _, b, _, c, _, d = g.lex('go\na\nb\nc\nd')
+            self.assertEqual(a.line, 2)
+            self.assertEqual(b.line, 3)
+            self.assertEqual(c.line, 4)
+            self.assertEqual(d.line, 5)
+
+            # Detect newlines in ignored tokens
+            for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
+                g = Lark('start: "a" [start]\n%ignore {}'.format(re), lexer=LEXER, parser=PARSER)
+                a, b = g.lex('a\na')
+                self.assertEqual(a.line, 1)
+                self.assertEqual(b.line, 2)
 
 
     _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()

From 1e4dbac58cbec032fd0271b5f1dac26ea2461068 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A1bio=20Mac=C3=AAdo=20Mendes?=
 <fabiomacedomendes@gmail.com>
Date: Fri, 9 Aug 2019 18:54:44 -0300
Subject: [PATCH 036/132] Fix undetected newlines on ignored tokens

---
 lark/lexer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index d3e4af6..377fab6 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -270,8 +270,9 @@ def _regexp_has_newline(r):
         - escaped newline (\\n)
         - anything but ([^...])
         - any-char (.) when the flag (?s) exists
+        - spaces (\s)
     """
-    return '\n' in r or '\\n' in r or '[^' in r or ('(?s' in r and '.' in r)
+    return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
 
 class Lexer(object):
     """Lexer interface

From 03ae3e1c0f47a399e729b613884bc4463d9bba23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A1bio=20Mac=C3=AAdo=20Mendes?=
 <fabiomacedomendes@gmail.com>
Date: Fri, 9 Aug 2019 18:55:12 -0300
Subject: [PATCH 037/132] Add .idea (Pycharm) to .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 293aae0..710a131 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 /lark_parser.egg-info/**
 tags
 .vscode
+.idea
 .ropeproject
 .cache
 /dist

From c5c763580e79f1521d992dcba9e9ba5d9742bc06 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sat, 10 Aug 2019 14:09:37 +0200
Subject: [PATCH 038/132] Fixed test for newline detection

---
 tests/test_parser.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tests/test_parser.py b/tests/test_parser.py
index d0aeb1c..82da48c 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1560,14 +1560,14 @@ def _make_parser_test(LEXER, PARSER):
 
         def test_lexer_detect_newline_tokens(self):
             # Detect newlines in regular tokens
-            g = Lark(r"""start: "go" tail*
-            tail : SA "a" | SB "b" | SC "c" | SD "d"
-            SA : /\n/
-            SB : /./
-            SC : /[^a-z]/
-            SD : /\s/g
-            """, parser=PARSER, lexer=LEXER)
-            _, _, a, _, b, _, c, _, d = g.lex('go\na\nb\nc\nd')
+            g = _Lark(r"""start: "go" tail*
+            !tail : SA "@" | SB "@" | SC "@" | SD "@"
+            SA : "a" /\n/
+            SB : /b./s
+            SC : "c" /[^a-z]/
+            SD : "d" /\s/
+            """)
+            a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children]
             self.assertEqual(a.line, 2)
             self.assertEqual(b.line, 3)
             self.assertEqual(c.line, 4)
@@ -1575,8 +1575,9 @@ def _make_parser_test(LEXER, PARSER):
 
             # Detect newlines in ignored tokens
             for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
-                g = Lark('start: "a" [start]\n%ignore {}'.format(re), lexer=LEXER, parser=PARSER)
-                a, b = g.lex('a\na')
+                g = _Lark('''!start: "a" "a"
+                             %ignore {}'''.format(re))
+                a, b = g.parse('a\na').children
                 self.assertEqual(a.line, 1)
                 self.assertEqual(b.line, 2)
 

From 71c4abfb245c30507cf0dc3ae1d0f62895282121 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 11 Aug 2019 11:26:31 +0200
Subject: [PATCH 039/132] Fixed error message (Issue #380)

---
 lark/load_grammar.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index f6c1d22..8cd5742 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -538,7 +538,7 @@ class Grammar:
             for dups in duplicates.values():
                 if len(dups) > 1:
                     if dups[0].expansion:
-                        raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
+                        raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)" % ''.join('\n  * %s' % i for i in dups))
 
                     # Empty rule; assert all other attributes are equal
                     assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)

From 9ca74d7f67a30bfcd2312a537051489f8b2612eb Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Tue, 13 Aug 2019 16:38:02 +0200
Subject: [PATCH 040/132] Added the serialize tool for exporting Lark state &
 analysis

---
 lark/lexer.py           |  5 ++++-
 lark/tools/serialize.py | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 lark/tools/serialize.py

diff --git a/lark/lexer.py b/lark/lexer.py
index 377fab6..0966a81 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -8,7 +8,6 @@ from .exceptions import UnexpectedCharacters, LexError
 ###{standalone
 
 class Pattern(Serialize):
-    __serialize_fields__ = 'value', 'flags'
 
     def __init__(self, value, flags=()):
         self.value = value
@@ -41,6 +40,8 @@ class Pattern(Serialize):
 
 
 class PatternStr(Pattern):
+    __serialize_fields__ = 'value', 'flags'
+
     type = "str"
     
     def to_regexp(self):
@@ -52,6 +53,8 @@ class PatternStr(Pattern):
     max_width = min_width
 
 class PatternRE(Pattern):
+    __serialize_fields__ = 'value', 'flags', '_width'
+
     type = "re"
 
     def to_regexp(self):
diff --git a/lark/tools/serialize.py b/lark/tools/serialize.py
new file mode 100644
index 0000000..ec4fca5
--- /dev/null
+++ b/lark/tools/serialize.py
@@ -0,0 +1,40 @@
+import codecs
+import sys
+import json
+
+from lark import Lark
+from lark.grammar import RuleOptions, Rule
+from lark.lexer import TerminalDef
+
+import argparse
+
+argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize') #description='''Lark Serialization Tool -- Stores Lark's internal state & LALR analysis as a convenient JSON file''')
+
+argparser.add_argument('grammar_file', type=argparse.FileType('r'), help='A valid .lark file')
+argparser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help='json file path to create (default=stdout)')
+argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")')
+argparser.add_argument('-l', '--lexer', default='standard', choices=['standard', 'contextual'], help='lexer type (default="standard")')
+
+
+def serialize(infile, outfile, lexer, start):
+    lark_inst = Lark(infile, parser="lalr", lexer=lexer, start=start)    # TODO contextual
+
+    data, memo = lark_inst.memo_serialize([TerminalDef, Rule])
+    outfile.write('{\n')
+    outfile.write('  "data": %s,\n' % json.dumps(data))
+    outfile.write('  "memo": %s\n' % json.dumps(memo))
+    outfile.write('}\n')
+
+
+def main():
+    if len(sys.argv) == 1 or '-h' in sys.argv or '--help' in sys.argv:
+        print("Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file")
+        print("")
+        argparser.print_help()
+    else:
+        args = argparser.parse_args()
+
+    serialize(args.grammar_file, args.out, args.lexer, args.start)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 3cdee35af57dbd0a3f9773ade8486044ab8720fc Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 14 Aug 2019 11:31:43 +0200
Subject: [PATCH 041/132] Version bump

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index db2ce44..dce9e17 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.7.2"
+__version__ = "0.7.3"

From d5036eefddbbeeff43b63d0f2e5f7d212ce96033 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 14 Aug 2019 23:37:20 +0200
Subject: [PATCH 042/132] Serialize tool: Multiple start symbols + bugfix

---
 lark/tools/serialize.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lark/tools/serialize.py b/lark/tools/serialize.py
index ec4fca5..fb69d35 100644
--- a/lark/tools/serialize.py
+++ b/lark/tools/serialize.py
@@ -12,7 +12,7 @@ argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize') #desc
 
 argparser.add_argument('grammar_file', type=argparse.FileType('r'), help='A valid .lark file')
 argparser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help='json file path to create (default=stdout)')
-argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")')
+argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")', nargs='+')
 argparser.add_argument('-l', '--lexer', default='standard', choices=['standard', 'contextual'], help='lexer type (default="standard")')
 
 
@@ -33,8 +33,7 @@ def main():
         argparser.print_help()
     else:
         args = argparser.parse_args()
-
-    serialize(args.grammar_file, args.out, args.lexer, args.start)
+        serialize(args.grammar_file, args.out, args.lexer, args.start)
 
 if __name__ == '__main__':
     main()
\ No newline at end of file

From 7e8488d1a01bdb1faa5175f6fa40fd3b84b22fce Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Thu, 15 Aug 2019 18:06:42 +0200
Subject: [PATCH 043/132] Fixed issue #425, keeping in mind unicode issue #411

---
 lark/lexer.py        | 2 +-
 lark/load_grammar.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 0966a81..48d8904 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -101,7 +101,7 @@ class Token(Str):
 
         self.type = type_
         self.pos_in_stream = pos_in_stream
-        self.value = Str(value)
+        self.value = value
         self.line = line
         self.column = column
         self.end_line = end_line
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 8cd5742..12ae38f 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder
 from .parser_frontends import LALR_TraditionalLexer
 from .common import LexerConf, ParserConf
 from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
-from .utils import classify, suppress, dedup_list
+from .utils import classify, suppress, dedup_list, Str
 from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken
 
 from .tree import Tree, SlottedTree as ST
@@ -451,9 +451,9 @@ class PrepareSymbols(Transformer_InPlace):
         if isinstance(v, Tree):
             return v
         elif v.type == 'RULE':
-            return NonTerminal(v.value)
+            return NonTerminal(Str(v.value))
         elif v.type == 'TERMINAL':
-            return Terminal(v.value, filter_out=v.startswith('_'))
+            return Terminal(Str(v.value), filter_out=v.startswith('_'))
         assert False
 
 def _choice_of_rules(rules):

From ad9a9cf37a3f10030d0f93e838e8f76f74b21327 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 17 Aug 2019 22:20:11 +0200
Subject: [PATCH 044/132] Added readthedocs.yml (Issue #426)

---
 readthedocs.yml | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 readthedocs.yml

diff --git a/readthedocs.yml b/readthedocs.yml
new file mode 100644
index 0000000..9eb8c0d
--- /dev/null
+++ b/readthedocs.yml
@@ -0,0 +1,2 @@
+version: 2
+formats: all

From 4bf67aa2d616f1f87630833af7cf0d939e638a6b Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 17 Aug 2019 22:25:44 +0200
Subject: [PATCH 045/132] Fix for readthedocs

---
 readthedocs.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/readthedocs.yml b/readthedocs.yml
index 9eb8c0d..f465212 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,2 +1,5 @@
 version: 2
+mkdocs:
+  configuration: mkdocs.yml
+  fail_on_warning: false
 formats: all

From 06bc432de3bb9fbcba8102b299ae8804cc69ed70 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 17 Aug 2019 22:32:20 +0200
Subject: [PATCH 046/132] Fix for readthedocs (another attempt)

---
 readthedocs.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/readthedocs.yml b/readthedocs.yml
index f465212..dc59191 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -1,5 +1,10 @@
 version: 2
+
 mkdocs:
   configuration: mkdocs.yml
   fail_on_warning: false
+
 formats: all
+
+python:
+  version: 3.7

From 4266db9ca1c3fe59510f3190a3945bf39d35c08c Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 17 Aug 2019 22:41:02 +0200
Subject: [PATCH 047/132] Fix for readthedocs (yet another attempt)

---
 readthedocs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readthedocs.yml b/readthedocs.yml
index dc59191..080eeeb 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -7,4 +7,4 @@ mkdocs:
 formats: all
 
 python:
-  version: 3.7
+  version: 3.5

From d845aa3bf1cf6408df27b843c02dd9b4b729c41a Mon Sep 17 00:00:00 2001
From: night199uk <night199uk@hermitcrabslab.com>
Date: Sat, 17 Aug 2019 11:04:11 -0700
Subject: [PATCH 048/132] Add debug flag to Early and XEarley to allow dumping
 the SPPF

---
 lark/parser_frontends.py | 5 ++++-
 lark/parsers/earley.py   | 7 ++++++-
 lark/parsers/xearley.py  | 4 ++--
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index c1bb3c9..8423ae4 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -139,7 +139,8 @@ class Earley(WithLexer):
         self.init_traditional_lexer()
 
         resolve_ambiguity = options.ambiguity == 'resolve'
-        self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity)
+        debug = options.debug if options else False
+        self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug)
 
     def match(self, term, token):
         return term.name == token.type
@@ -152,10 +153,12 @@ class XEarley(_ParserFrontend):
 
         self._prepare_match(lexer_conf)
         resolve_ambiguity = options.ambiguity == 'resolve'
+        debug = options.debug if options else False
         self.parser = xearley.Parser(parser_conf,
                                     self.match,
                                     ignore=lexer_conf.ignore,
                                     resolve_ambiguity=resolve_ambiguity,
+                                    debug=debug,
                                     **kw
                                     )
 
diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index 4d6201b..a98be02 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -20,10 +20,11 @@ from .earley_common import Item, TransitiveItem
 from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor
 
 class Parser:
-    def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True):
+    def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False):
         analysis = GrammarAnalyzer(parser_conf)
         self.parser_conf = parser_conf
         self.resolve_ambiguity = resolve_ambiguity
+        self.debug = debug
 
         self.FIRST = analysis.FIRST
         self.NULLABLE = analysis.NULLABLE
@@ -296,6 +297,10 @@ class Parser:
         # symbol should have been completed in the last step of the Earley cycle, and will be in
         # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
         solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
+        if self.debug:
+            from .earley_forest import ForestToPyDotVisitor
+            debug_walker = ForestToPyDotVisitor()
+            debug_walker.visit(solutions[0], "sppf.png")
 
         if not solutions:
             expected_tokens = [t.expect for t in to_scan]
diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py
index 4ab3ba9..3898d6a 100644
--- a/lark/parsers/xearley.py
+++ b/lark/parsers/xearley.py
@@ -24,8 +24,8 @@ from .earley_forest import SymbolNode
 
 
 class Parser(BaseParser):
-    def __init__(self,  parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False):
-        BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity)
+    def __init__(self,  parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False, debug=False):
+        BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity, debug)
         self.ignore = [Terminal(t) for t in ignore]
         self.complete_lex = complete_lex
 

From dc94ebc42f984ed5b1de6c08eba87c808d790bc7 Mon Sep 17 00:00:00 2001
From: night199uk <night199uk@hermitcrabslab.com>
Date: Sat, 17 Aug 2019 17:53:26 -0700
Subject: [PATCH 049/132] Fix Earley non-determinism

Rule.order should be set as the index of each expansion with rules
of the same name (e.g: a : b    # rule.order 1 | c    # rule.order 2).
---
 lark/load_grammar.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 12ae38f..7b3bb3f 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -511,12 +511,12 @@ class Grammar:
 
         simplify_rule = SimplifyRule_Visitor()
         compiled_rules = []
-        for i, rule_content in enumerate(rules):
+        for rule_content in rules:
             name, tree, options = rule_content
             simplify_rule.visit(tree)
             expansions = rule_tree_to_text.transform(tree)
 
-            for expansion, alias in expansions:
+            for i, (expansion, alias) in enumerate(expansions):
                 if alias and name.startswith('_'):
                     raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))
 

From dc3c009dca02052a8a1df700d22413efd9abcf01 Mon Sep 17 00:00:00 2001
From: night199uk <night199uk@hermitcrabslab.com>
Date: Mon, 19 Aug 2019 20:35:27 -0700
Subject: [PATCH 050/132] Tweak the Earley ambiguity algorithm to correctly
 prefer earlier branches

---
 lark/parsers/earley_forest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/parsers/earley_forest.py b/lark/parsers/earley_forest.py
index 89522cd..bbceb42 100644
--- a/lark/parsers/earley_forest.py
+++ b/lark/parsers/earley_forest.py
@@ -122,7 +122,7 @@ class PackedNode(ForestNode):
         ambiguously. Hence, we use the sort order to identify
         the order in which ambiguous children should be considered.
         """
-        return self.is_empty, -self.priority, -self.rule.order
+        return self.is_empty, -self.priority, self.rule.order
 
     def __iter__(self):
         return iter([self.left, self.right])

From 59f3a5707bb486b2127a27e145a6b0058bac89b9 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Tue, 20 Aug 2019 11:19:16 +0200
Subject: [PATCH 051/132] Fixed partials (Issue #398)

---
 lark/utils.py       |  2 +-
 tests/test_trees.py | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/lark/utils.py b/lark/utils.py
index d46beec..afcb072 100644
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -160,7 +160,7 @@ def smart_decorator(f, create_decorator):
 
     elif isinstance(f, partial):
         # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
-        return create_decorator(f.__func__, True)
+        return wraps(f.func)(create_decorator(f.func, True))
 
     else:
         return create_decorator(f.__func__.__call__, True)
diff --git a/tests/test_trees.py b/tests/test_trees.py
index 38f74d5..b28ace2 100644
--- a/tests/test_trees.py
+++ b/tests/test_trees.py
@@ -4,6 +4,7 @@ import unittest
 from unittest import TestCase
 import copy
 import pickle
+import functools
 
 from lark.tree import Tree
 from lark.visitors import Transformer, Interpreter, visit_children_decor, v_args, Discard
@@ -146,6 +147,22 @@ class TestTrees(TestCase):
         res = T().transform(t)
         self.assertEqual(res, 2.9)
 
+    def test_partial(self):
+
+        tree = Tree("start", [Tree("a", ["test1"]), Tree("b", ["test2"])])
+
+        def test(t, s):
+            return s.upper()
+
+        @v_args(inline=True)
+        class T(Transformer):
+            a = functools.partial(test)
+            b = functools.partial(lambda t, s: s + "!")
+
+        res = T().transform(tree)
+        assert res.children == ["TEST1", "test2!"]
+
+
     def test_discard(self):
         class MyTransformer(Transformer):
             def a(self, args):

From def1d2931c70c096d7941a2b7df9eb0f2814cd7d Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Tue, 20 Aug 2019 11:34:48 +0200
Subject: [PATCH 052/132] Fixed partials (Issue #398)

---
 lark/utils.py       |  2 +-
 tests/test_trees.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lark/utils.py b/lark/utils.py
index afcb072..5eb2333 100644
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -160,7 +160,7 @@ def smart_decorator(f, create_decorator):
 
     elif isinstance(f, partial):
         # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
-        return wraps(f.func)(create_decorator(f.func, True))
+        return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
 
     else:
         return create_decorator(f.__func__.__call__, True)
diff --git a/tests/test_trees.py b/tests/test_trees.py
index b28ace2..4216bd6 100644
--- a/tests/test_trees.py
+++ b/tests/test_trees.py
@@ -151,16 +151,16 @@ class TestTrees(TestCase):
 
         tree = Tree("start", [Tree("a", ["test1"]), Tree("b", ["test2"])])
 
-        def test(t, s):
-            return s.upper()
+        def test(prefix, s, postfix):
+            return prefix + s.upper() + postfix
 
         @v_args(inline=True)
         class T(Transformer):
-            a = functools.partial(test)
-            b = functools.partial(lambda t, s: s + "!")
+            a = functools.partial(test, "@", postfix="!")
+            b = functools.partial(lambda s: s + "!")
 
         res = T().transform(tree)
-        assert res.children == ["TEST1", "test2!"]
+        assert res.children == ["@TEST1!", "test2!"]
 
 
     def test_discard(self):

From 464f720385c3e67a0ddfcd61b586cfe3b3a32253 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erez27+git@gmail.com>
Date: Wed, 21 Aug 2019 00:30:03 +0200
Subject: [PATCH 053/132] Fix links in README (Issue #422)

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 975b9a4..b9a1bda 100644
--- a/README.md
+++ b/README.md
@@ -72,7 +72,7 @@ Lark is great at handling ambiguity. Let's parse the phrase "fruit flies like ba
 
 ![fruitflies.png](examples/fruitflies.png)
 
-See more [examples in the wiki](https://github.com/erezsh/lark/wiki/Examples)
+See more [examples here](https://github.com/lark-parser/lark/tree/master/examples)
 
 
 
@@ -95,7 +95,7 @@ See more [examples in the wiki](https://github.com/erezsh/lark/wiki/Examples)
  - Extensive test suite [![codecov](https://codecov.io/gh/erezsh/lark/branch/master/graph/badge.svg)](https://codecov.io/gh/erezsh/lark)
  - And much more!
 
-See the full list of [features in the wiki](https://github.com/erezsh/lark/wiki/Features)
+See the full list of [features here](https://lark-parser.readthedocs.io/en/latest/features/)
 
 
 ### Comparison to other libraries

From c00f4448faae5d6f05487f021c8a7ad2055c60c6 Mon Sep 17 00:00:00 2001
From: Michael Heyvaert <michael.heyvaert@foodpairing.com>
Date: Wed, 21 Aug 2019 12:14:28 +0200
Subject: [PATCH 054/132] fix custom lexer handling for lalr parser + test

---
 lark/parser_frontends.py |  2 +-
 tests/__main__.py        |  1 +
 tests/test_parser.py     | 19 +++++++++++++++----
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index 8423ae4..ec82299 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -118,7 +118,7 @@ class LALR_ContextualLexer(LALR_WithLexer):
 
 class LALR_CustomLexer(LALR_WithLexer):
     def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None):
-        self.lexer = lexer_cls(self.lexer_conf)
+        self.lexer = lexer_cls(lexer_conf)
         debug = options.debug if options else False
         self.parser = LALR_Parser(parser_conf, debug=debug)
         WithLexer.__init__(self, lexer_conf, parser_conf, options)
diff --git a/tests/__main__.py b/tests/__main__.py
index 1c8a951..4762773 100644
--- a/tests/__main__.py
+++ b/tests/__main__.py
@@ -21,6 +21,7 @@ from .test_parser import (
         TestCykStandard,
         TestLalrContextual,
         TestEarleyDynamic,
+        TestLalrCustom,
 
         # TestFullEarleyStandard,
         TestFullEarleyDynamic,
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 82da48c..4db5ce9 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -22,7 +22,7 @@ from lark.exceptions import GrammarError, ParseError, UnexpectedToken, Unexpecte
 from lark.tree import Tree
 from lark.visitors import Transformer, Transformer_InPlace, v_args
 from lark.grammar import Rule
-from lark.lexer import TerminalDef
+from lark.lexer import TerminalDef, Lexer, TraditionalLexer
 
 __path__ = os.path.dirname(__file__)
 def _read(n, *args):
@@ -431,12 +431,22 @@ def _make_full_earley_test(LEXER):
     _TestFullEarley.__name__ = _NAME
     globals()[_NAME] = _TestFullEarley
 
+class CustomLexer(Lexer):
+    """
+    Purpose of this custom lexer is to test the integration,
+    so it uses the traditionalparser as implementation without custom lexing behaviour.
+    """
+    def __init__(self, lexer_conf):
+        self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks)
+    def lex(self, *args, **kwargs):
+        return self.lexer.lex(*args, **kwargs)
 
 def _make_parser_test(LEXER, PARSER):
+    lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
     def _Lark(grammar, **kwargs):
-        return Lark(grammar, lexer=LEXER, parser=PARSER, propagate_positions=True, **kwargs)
+        return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
     def _Lark_open(gfilename, **kwargs):
-        return Lark.open(gfilename, lexer=LEXER, parser=PARSER, propagate_positions=True, **kwargs)
+        return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
     class _TestParser(unittest.TestCase):
         def test_basic1(self):
             g = _Lark("""start: a+ b a* "b" a*
@@ -1532,7 +1542,7 @@ def _make_parser_test(LEXER, PARSER):
             parser = _Lark(grammar)
 
 
-        @unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)")
+        @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
         def test_serialize(self):
             grammar = """
                 start: _ANY b "C"
@@ -1594,6 +1604,7 @@ _TO_TEST = [
         ('dynamic_complete', 'earley'),
         ('standard', 'lalr'),
         ('contextual', 'lalr'),
+        ('custom', 'lalr'),
         # (None, 'earley'),
 ]
 

From b6b95c3ff01896a45b7835a7375203969a8040e3 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 26 Aug 2019 15:45:35 +0200
Subject: [PATCH 056/132] Raw docstring to avoid escape warnings (Issue #438)

---
 lark/lexer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 48d8904..9cd7adb 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -268,7 +268,7 @@ def build_mres(terminals, match_whole=False):
     return _build_mres(terminals, len(terminals), match_whole)
 
 def _regexp_has_newline(r):
-    """Expressions that may indicate newlines in a regexp:
+    r"""Expressions that may indicate newlines in a regexp:
         - newlines (\n)
         - escaped newline (\\n)
         - anything but ([^...])

From 11cd11394f6dcc88bf80962642932ada2d1e9efb Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Tue, 27 Aug 2019 23:53:08 +0200
Subject: [PATCH 057/132] Possibly a fix for issue #441

---
 lark/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/utils.py b/lark/utils.py
index 5eb2333..9513b8b 100644
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -172,7 +172,7 @@ import sre_parse
 import sre_constants
 def get_regexp_width(regexp):
     try:
-        return sre_parse.parse(regexp).getwidth()
+        return [int(x) for x in sre_parse.parse(regexp).getwidth()]
     except sre_constants.error:
         raise ValueError(regexp)
 

From a17311785711aceb1bc0211f5631d55f4256a72b Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 28 Aug 2019 23:20:41 +0200
Subject: [PATCH 058/132] Included iter_subtrees and related methods in
 standalone parser (Issue #440)

---
 lark/tree.py | 49 +++++++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/lark/tree.py b/lark/tree.py
index fd0038e..ee8dfb7 100644
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -56,30 +56,6 @@ class Tree(object):
 
     def __hash__(self):
         return hash((self.data, tuple(self.children)))
-###}
-
-    def expand_kids_by_index(self, *indices):
-        "Expand (inline) children at the given indices"
-        for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
-            kid = self.children[i]
-            self.children[i:i+1] = kid.children
-
-    def find_pred(self, pred):
-        "Find all nodes where pred(tree) == True"
-        return filter(pred, self.iter_subtrees())
-
-    def find_data(self, data):
-        "Find all nodes where tree.data == data"
-        return self.find_pred(lambda t: t.data == data)
-
-    def scan_values(self, pred):
-        for c in self.children:
-            if isinstance(c, Tree):
-                for t in c.scan_values(pred):
-                    yield t
-            else:
-                if pred(c):
-                    yield c
 
     def iter_subtrees(self):
         # TODO: Re-write as a more efficient version
@@ -102,6 +78,31 @@ class Tree(object):
                 yield x
                 seen.add(id(x))
 
+    def find_pred(self, pred):
+        "Find all nodes where pred(tree) == True"
+        return filter(pred, self.iter_subtrees())
+
+    def find_data(self, data):
+        "Find all nodes where tree.data == data"
+        return self.find_pred(lambda t: t.data == data)
+
+###}
+
+    def expand_kids_by_index(self, *indices):
+        "Expand (inline) children at the given indices"
+        for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
+            kid = self.children[i]
+            self.children[i:i+1] = kid.children
+
+    def scan_values(self, pred):
+        for c in self.children:
+            if isinstance(c, Tree):
+                for t in c.scan_values(pred):
+                    yield t
+            else:
+                if pred(c):
+                    yield c
+
     def iter_subtrees_topdown(self):
         stack = [self]
         while stack:

From 56978206a37476b919980c2ccff6d36141ae4161 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 28 Aug 2019 23:40:20 +0200
Subject: [PATCH 059/132] No longer confusing aliases and rules when importing
 (Issue #433)

---
 lark/load_grammar.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 7b3bb3f..4ecfd22 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -605,6 +605,7 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):
             _, tree, _ = imported_rules[symbol]
         except KeyError:
             raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace))
+        tree = next(tree.find_data("expansion"))    # Skip "alias" or other annotations
         return tree.scan_values(lambda x: x.type in ('RULE', 'TERMINAL'))
 
     def get_namespace_name(name):

From f06a83a8a79e0507ae58e7f8c5af8888e1d92da8 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 28 Aug 2019 23:50:36 +0200
Subject: [PATCH 060/132] Better error for literal with bad escaping (Issue
 #287)

---
 lark/load_grammar.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 4ecfd22..83ee119 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -351,7 +351,10 @@ def _fix_escaping(s):
     for n in i:
         w += n
         if n == '\\':
-            n2 = next(i)
+            try:
+                n2 = next(i)
+            except StopIteration:
+                raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
             if n2 == '\\':
                 w += '\\\\'
             elif n2 not in 'uxnftr':

From 71b17d8e7ce5fe113c3ffa0e9c7d7e3ab298636a Mon Sep 17 00:00:00 2001
From: Erez Shinan <erez27+git@gmail.com>
Date: Thu, 5 Sep 2019 17:12:02 +0300
Subject: [PATCH 061/132] Update __init__.py

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index dce9e17..2b75d7a 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.7.3"
+__version__ = "0.7.4"

From 571bb400e3ca6ba0d262bdbc42d4c969dea47345 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Thu, 5 Sep 2019 17:35:17 +0300
Subject: [PATCH 062/132] Bugfix for regression (Issue #445)

---
 lark/load_grammar.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 83ee119..90911fd 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -608,8 +608,9 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):
             _, tree, _ = imported_rules[symbol]
         except KeyError:
             raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace))
-        tree = next(tree.find_data("expansion"))    # Skip "alias" or other annotations
-        return tree.scan_values(lambda x: x.type in ('RULE', 'TERMINAL'))
+
+        return _find_used_symbols(tree)
+
 
     def get_namespace_name(name):
         try:
@@ -686,6 +687,11 @@ class PrepareGrammar(Transformer_InPlace):
         return name
 
 
+def _find_used_symbols(tree):
+    assert tree.data == 'expansions'
+    return {t for x in tree.find_data('expansion')
+              for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}
+
 class GrammarLoader:
     def __init__(self):
         terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]
@@ -847,9 +853,7 @@ class GrammarLoader:
             rule_names.add(name)
 
         for name, expansions, _o in rules:
-            used_symbols = {t for x in expansions.find_data('expansion')
-                              for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))}
-            for sym in used_symbols:
+            for sym in _find_used_symbols(expansions):
                 if sym.type == 'TERMINAL':
                     if sym not in terminal_names:
                         raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name))

From 54b18e596158071e481aae6e6d74d2f85b2ee4e1 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Fri, 6 Sep 2019 08:11:45 +0300
Subject: [PATCH 063/132] Allow transformers to transform tokens (Issue #389)

---
 lark/visitors.py     | 31 ++++++++++++++++++++++++++++++-
 tests/test_parser.py | 18 ++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/lark/visitors.py b/lark/visitors.py
index 4a0f639..7d40e74 100644
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -3,6 +3,7 @@ from functools import wraps
 from .utils import smart_decorator
 from .tree import Tree
 from .exceptions import VisitError, GrammarError
+from .lexer import Token
 
 ###{standalone
 from inspect import getmembers, getmro
@@ -21,6 +22,10 @@ class Transformer:
     Can be used to implement map or reduce.
     """
 
+    __visit_tokens__ = False   # For backwards compatibility
+    def __init__(self,  visit_tokens=False):
+        self.__visit_tokens__ = visit_tokens
+
     def _call_userfunc(self, tree, new_children=None):
         # Assumes tree is already transformed
         children = new_children if new_children is not None else tree.children
@@ -45,10 +50,29 @@ class Transformer:
             except Exception as e:
                 raise VisitError(tree, e)
 
+    def _call_userfunc_token(self, token):
+        try:
+            f = getattr(self, token.type)
+        except AttributeError:
+            return self.__default_token__(token)
+        else:
+            try:
+                return f(token)
+            except (GrammarError, Discard):
+                raise
+            except Exception as e:
+                raise VisitError(token, e)
+
+
     def _transform_children(self, children):
         for c in children:
             try:
-                yield self._transform_tree(c) if isinstance(c, Tree) else c
+                if isinstance(c, Tree):
+                    yield self._transform_tree(c)
+                elif self.__visit_tokens__ and isinstance(c, Token):
+                    yield self._call_userfunc_token(c)
+                else:
+                    yield c
             except Discard:
                 pass
 
@@ -66,6 +90,11 @@ class Transformer:
         "Default operation on tree (for override)"
         return Tree(data, children, meta)
 
+    def __default_token__(self, token):
+        "Default operation on token (for override)"
+        return token
+
+
     @classmethod
     def _apply_decorator(cls, decorator, **kwargs):
         mro = getmro(cls)
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 4db5ce9..e9d46e5 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -94,6 +94,24 @@ class TestParsers(unittest.TestCase):
         r = g.parse('xx')
         self.assertEqual( r.children[0].data, "c" )
 
+    def test_visit_tokens(self):
+        class T(Transformer):
+            def a(self, children):
+                return children[0] + "!"
+            def A(self, tok):
+                return tok.upper()
+
+        # Test regular
+        g = Lark("""start: a
+                    a : A
+                    A: "x"
+                 """, parser='lalr')
+        r = T().transform(g.parse("x"))
+        self.assertEqual( r.children, ["x!"] )
+        r = T(True).transform(g.parse("x"))
+        self.assertEqual( r.children, ["X!"] )
+
+
     def test_embedded_transformer(self):
         class T(Transformer):
             def a(self, children):

From deb325acb4a4203056f5b51b8457cb0614c10ce1 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Fri, 6 Sep 2019 08:16:42 +0300
Subject: [PATCH 064/132] Pydot now not necessary for earley debug, shows
 warning instead (Issue #443)

---
 lark/parsers/earley.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index a98be02..ff4e125 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -10,6 +10,7 @@ is better documented here:
     http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
 """
 
+import logging
 from collections import deque
 
 from ..visitors import Transformer_InPlace, v_args
@@ -299,8 +300,13 @@ class Parser:
         solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
         if self.debug:
             from .earley_forest import ForestToPyDotVisitor
-            debug_walker = ForestToPyDotVisitor()
-            debug_walker.visit(solutions[0], "sppf.png")
+            try:
+                debug_walker = ForestToPyDotVisitor()
+            except ImportError:
+                logging.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
+            else:
+                debug_walker.visit(solutions[0], "sppf.png")
+
 
         if not solutions:
             expected_tokens = [t.expect for t in to_scan]

From f43631949cddf6a50fa1bb5d083335b2c6eefee8 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Fri, 6 Sep 2019 08:18:42 +0300
Subject: [PATCH 065/132] Version bump

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index 2b75d7a..69d9faf 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.7.4"
+__version__ = "0.7.5"

From 535aebab3c770d5b3acbe6fa21394c901a1f2345 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Wed, 11 Sep 2019 01:05:15 +0300
Subject: [PATCH 066/132] Added to docs (Issue #400)

---
 docs/grammar.md | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/docs/grammar.md b/docs/grammar.md
index 9343ee4..228c8b7 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -1,5 +1,13 @@
 # Grammar Reference
 
+Table of contents:
+
+1. [Definitions](#defs)
+1. [Terminals](#terms)
+1. [Rules](#rules)
+1. [Directives](#dirs)
+
+<a name="defs"></a>
 ## Definitions
 
 **A grammar** is a list of rules and terminals, that together define a language.
@@ -25,6 +33,7 @@ Lark begins the parse with the rule 'start', unless specified otherwise in the o
 Names of rules are always in lowercase, while names of terminals are always in uppercase. This distinction has practical effects, for the shape of the generated parse-tree, and the automatic construction of the lexer (aka tokenizer, or scanner).
 
 
+<a name="terms"></a>
 ## Terminals
 
 Terminals are used to match text into symbols. They can be defined as a combination of literals and other terminals.
@@ -70,6 +79,53 @@ WHITESPACE: (" " | /\t/ )+
 SQL_SELECT: "select"i
 ```
 
+### Regular expressions & Ambiguity
+
+Each terminal is eventually compiled to a regular expression. All the operators and references inside it are mapped to their respective expressions.
+
+For example, in the following grammar, `A1` and `A2`, are equivalent:
+```perl
+A1: "a" | "b"
+A2: /a|b/
+```
+
+This means that inside terminals, Lark cannot detect or resolve ambiguity, even when using Earley.
+
+For example, for this grammar:
+```perl
+start           : (A | B)+
+A               : "a" | "ab"
+B               : "b"
+```
+We get this behavior:
+
+```bash
+>>> p.parse("ab")
+Tree(start, [Token(A, 'a'), Token(B, 'b')])
+```
+
+This is happening because Python's regex engine always returns the first matching option.
+
+If you find yourself in this situation, the recommended solution is to use rules instead.
+
+Example:
+
+```python
+>>> p = Lark("""start: (a | b)+
+...             !a: "a" | "ab"
+...             !b: "b"
+...             """, ambiguity="explicit")
+>>> print(p.parse("ab").pretty())
+_ambig
+  start
+    a   ab
+  start
+    a   a
+    b   b
+```
+
+
+<a name="rules"></a>
 ## Rules
 
 **Syntax:**
@@ -114,6 +170,7 @@ Rules can be assigned priority only when using Earley (future versions may suppo
 
 Priority can be either positive or negative. In not specified for a terminal, it's assumed to be 1 (i.e. the default).
 
+<a name="dirs"></a>
 ## Directives
 
 ### %ignore

From bb57629418c3711c5d3477c7280882fa8927b70a Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Fri, 13 Sep 2019 16:12:22 +0300
Subject: [PATCH 067/132] Added 'edit_terminals' option (Issue #406)

---
 lark/lark.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lark/lark.py b/lark/lark.py
index ae71d56..c27f534 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -69,6 +69,7 @@ class LarkOptions(Serialize):
         'propagate_positions': False,
         'lexer_callbacks': {},
         'maybe_placeholders': False,
+        'edit_terminals': None,
     }
 
     def __init__(self, options_dict):
@@ -205,6 +206,10 @@ class Lark(Serialize):
         # Compile the EBNF grammar into BNF
         self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
 
+        if self.options.edit_terminals:
+            for t in self.terminals:
+                self.options.edit_terminals(t)
+
         self._terminals_dict = {t.name:t for t in self.terminals}
 
         # If the user asked to invert the priorities, negate them all here.

From 7a13fb0f5b968046795fa9d221a38c2a34503605 Mon Sep 17 00:00:00 2001
From: Wataru Ashihara <wataash@wataash.com>
Date: Sun, 22 Sep 2019 13:51:14 +0900
Subject: [PATCH 068/132] Fix SyntaxError with Python 3 in JSON tutorial

unpacking a tuple argument is invalid in Python 3.

    >>> def foo(a, (b, c)):
      File "<stdin>", line 1
        def foo(a, (b, c)):
                   ^
    SyntaxError: invalid syntax

Fixes #403
---
 docs/json_tutorial.md | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/docs/json_tutorial.md b/docs/json_tutorial.md
index ca1db73..9cc87e7 100644
--- a/docs/json_tutorial.md
+++ b/docs/json_tutorial.md
@@ -230,7 +230,8 @@ from lark import Transformer
 class MyTransformer(Transformer):
     def list(self, items):
         return list(items)
-    def pair(self, (k,v)):
+    def pair(self, key_value):
+        k, v = key_value
         return k, v
     def dict(self, items):
         return dict(items)
@@ -251,9 +252,11 @@ Also, our definitions of list and dict are a bit verbose. We can do better:
 from lark import Transformer
 
 class TreeToJson(Transformer):
-    def string(self, (s,)):
+    def string(self, s):
+        (s,) = s
         return s[1:-1]
-    def number(self, (n,)):
+    def number(self, n):
+        (n,) = n
         return float(n)
 
     list = list
@@ -315,9 +318,11 @@ json_grammar = r"""
     """
 
 class TreeToJson(Transformer):
-    def string(self, (s,)):
+    def string(self, s):
+        (s,) = s
         return s[1:-1]
-    def number(self, (n,)):
+    def number(self, n):
+        (n,) = n
         return float(n)
 
     list = list

From d331a8a1b868f73635e78df51b128c90083a413e Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sun, 15 Sep 2019 14:34:53 +0300
Subject: [PATCH 069/132] Version bump (alpha)

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index 69d9faf..903c10b 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.7.5"
+__version__ = "0.7.6a1"

From a7e7b568ff5535a3becee9625ba469b5db444979 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 28 Sep 2019 21:42:39 +0300
Subject: [PATCH 070/132] Fixed contextual lexer error that was confusing users
 (Issue #194)

---
 lark/lexer.py | 81 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 47 insertions(+), 34 deletions(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 9cd7adb..9ea224e 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -3,7 +3,7 @@
 import re
 
 from .utils import Str, classify, get_regexp_width, Py36, Serialize
-from .exceptions import UnexpectedCharacters, LexError
+from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
 
 ###{standalone
 
@@ -43,7 +43,7 @@ class PatternStr(Pattern):
     __serialize_fields__ = 'value', 'flags'
 
     type = "str"
-    
+
     def to_regexp(self):
         return self._get_flags(re.escape(self.value))
 
@@ -166,37 +166,32 @@ class _Lex:
 
         while line_ctr.char_pos < len(stream):
             lexer = self.lexer
-            for mre, type_from_index in lexer.mres:
-                m = mre.match(stream, line_ctr.char_pos)
-                if not m:
-                    continue
-
-                t = None
-                value = m.group(0)
-                type_ = type_from_index[m.lastindex]
-                if type_ not in ignore_types:
-                    t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
-                    if t.type in lexer.callback:
-                        t = lexer.callback[t.type](t)
-                        if not isinstance(t, Token):
-                            raise ValueError("Callbacks must return a token (returned %r)" % t)
-                    last_token = t
-                    yield t
-                else:
-                    if type_ in lexer.callback:
-                        t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
-                        lexer.callback[type_](t)
-
-                line_ctr.feed(value, type_ in newline_types)
-                if t:
-                    t.end_line = line_ctr.line
-                    t.end_column = line_ctr.column
-
-                break
-            else:
+            res = lexer.match(stream, line_ctr.char_pos)
+            if not res:
                 allowed = {v for m, tfi in lexer.mres for v in tfi.values()}
                 raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token])
 
+            value, type_ = res
+
+            t = None
+            if type_ not in ignore_types:
+                t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
+                if t.type in lexer.callback:
+                    t = lexer.callback[t.type](t)
+                    if not isinstance(t, Token):
+                        raise ValueError("Callbacks must return a token (returned %r)" % t)
+                last_token = t
+                yield t
+            else:
+                if type_ in lexer.callback:
+                    t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
+                    lexer.callback[type_](t)
+
+            line_ctr.feed(value, type_ in newline_types)
+            if t:
+                t.end_line = line_ctr.line
+                t.end_column = line_ctr.column
+
 
 class UnlessCallback:
     def __init__(self, mres):
@@ -330,6 +325,11 @@ class TraditionalLexer(Lexer):
 
         self.mres = build_mres(terminals)
 
+    def match(self, stream, pos):
+        for mre, type_from_index in self.mres:
+            m = mre.match(stream, pos)
+            if m:
+                return m.group(0), type_from_index[m.lastindex]
 
     def lex(self, stream):
         return _Lex(self).lex(stream, self.newline_types, self.ignore_types)
@@ -367,9 +367,22 @@ class ContextualLexer(Lexer):
 
     def lex(self, stream):
         l = _Lex(self.lexers[self.parser_state], self.parser_state)
-        for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
-            yield x
-            l.lexer = self.lexers[self.parser_state]
-            l.state = self.parser_state
+        try:
+            for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
+                yield x
+                l.lexer = self.lexers[self.parser_state]
+                l.state = self.parser_state
+        except UnexpectedCharacters as e:
+            # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined,
+            # but not in the current context.
+            # This tests the input against the global context, to provide a nicer error.
+            root_match = self.root_lexer.match(stream, e.pos_in_stream)
+            if not root_match:
+                raise
+
+            value, type_ = root_match
+            t = Token(type_, value, e.pos_in_stream, e.line, e.column)
+            expected = {v for m, tfi in l.lexer.mres for v in tfi.values()}
+            raise UnexpectedToken(t, expected)
 
 ###}

From a207963e46a0d71a34980ccf85841e78b3d37c95 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 28 Sep 2019 23:38:02 +0300
Subject: [PATCH 071/132] Improved error reporting (Issue #194)

---
 lark/lexer.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 9ea224e..26213ed 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -168,7 +168,9 @@ class _Lex:
             lexer = self.lexer
             res = lexer.match(stream, line_ctr.char_pos)
             if not res:
-                allowed = {v for m, tfi in lexer.mres for v in tfi.values()}
+                allowed = {v for m, tfi in lexer.mres for v in tfi.values()} - ignore_types
+                if not allowed:
+                    allowed = {"<END-OF-FILE>"}
                 raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token])
 
             value, type_ = res
@@ -382,7 +384,6 @@ class ContextualLexer(Lexer):
 
             value, type_ = root_match
             t = Token(type_, value, e.pos_in_stream, e.line, e.column)
-            expected = {v for m, tfi in l.lexer.mres for v in tfi.values()}
-            raise UnexpectedToken(t, expected)
+            raise UnexpectedToken(t, e.allowed)
 
 ###}

From 94da6c52b80444c141af50562b507155ca88526d Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Tue, 1 Oct 2019 23:17:21 +0300
Subject: [PATCH 072/132] Refactored the Earley code to make it thread-safe
 (Issue #454)

---
 lark/parsers/earley.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index ff4e125..87920c3 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -46,12 +46,8 @@ class Parser:
             #  skip the extra tree walk. We'll also skip this if the user just didn't specify priorities
             #  on any rules.
             if self.forest_sum_visitor is None and rule.options and rule.options.priority is not None:
-                self.forest_sum_visitor = ForestSumVisitor()
+                self.forest_sum_visitor = ForestSumVisitor
 
-        if resolve_ambiguity:
-            self.forest_tree_visitor = ForestToTreeVisitor(self.callbacks, self.forest_sum_visitor)
-        else:
-            self.forest_tree_visitor = ForestToAmbiguousTreeVisitor(self.callbacks, self.forest_sum_visitor)
         self.term_matcher = term_matcher
 
 
@@ -316,7 +312,10 @@ class Parser:
             assert False, 'Earley should not generate multiple start symbol items!'
 
         # Perform our SPPF -> AST conversion using the right ForestVisitor.
-        return self.forest_tree_visitor.visit(solutions[0])
+        forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else ForestToAmbiguousTreeVisitor
+        forest_tree_visitor = forest_tree_visitor_cls(self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor())
+
+        return forest_tree_visitor.visit(solutions[0])
 
 
 class ApplyCallbacks(Transformer_InPlace):

From 404fef87f4f168543a333ca3ecb045f7017ac15a Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 3 Oct 2019 11:29:49 +0300
Subject: [PATCH 073/132] Version bump

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index 903c10b..6d8b325 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.7.6a1"
+__version__ = "0.7.7"

From f566a3618b45340a9b5b0591f36796fea415ff46 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 7 Oct 2019 12:14:10 +0300
Subject: [PATCH 074/132] Bugfix: Lark now throws an error for recursive
 terminals (Issue #264)

---
 lark/load_grammar.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 90911fd..1b4ab65 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -479,7 +479,7 @@ class Grammar:
         # ===================
 
         # Convert terminal-trees to strings/regexps
-        transformer = PrepareLiterals() * TerminalTreeToPattern()
+
         for name, (term_tree, priority) in term_defs:
             if term_tree is None:  # Terminal added through %declare
                 continue
@@ -487,7 +487,8 @@ class Grammar:
             if len(expansions) == 1 and not expansions[0].children:
                 raise GrammarError("Terminals cannot be empty (%s)" % name)
 
-        terminals = [TerminalDef(name, transformer.transform(term_tree), priority)
+        transformer = PrepareLiterals() * TerminalTreeToPattern()
+        terminals = [TerminalDef(name, transformer.transform( term_tree ), priority)
                   for name, (term_tree, priority) in term_defs if term_tree]
 
         # =================
@@ -638,11 +639,10 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):
 
 
 def resolve_term_references(term_defs):
-    # TODO Cycles detection
     # TODO Solve with transitive closure (maybe)
 
-    token_dict = {k:t for k, (t,_p) in term_defs}
-    assert len(token_dict) == len(term_defs), "Same name defined twice?"
+    term_dict = {k:t for k, (t,_p) in term_defs}
+    assert len(term_dict) == len(term_defs), "Same name defined twice?"
 
     while True:
         changed = False
@@ -655,11 +655,19 @@ def resolve_term_references(term_defs):
                     if item.type == 'RULE':
                         raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name))
                     if item.type == 'TERMINAL':
-                        exp.children[0] = token_dict[item]
+                        term_value = term_dict[item]
+                        exp.children[0] = term_value
                         changed = True
         if not changed:
             break
 
+    for name, term in term_dict.items():
+        for child in term.children:
+            ids = [id(x) for x in child.iter_subtrees()]
+            if id(term) in ids:
+                raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name)
+
+
 def options_from_rule(name, *x):
     if len(x) > 1:
         priority, expansions = x

From 0a4530b9427c35a262c8248424cde2e06be54f09 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Tue, 8 Oct 2019 09:16:33 +0300
Subject: [PATCH 075/132] Improved Earley error on EOF (Issue #457)

---
 lark/exceptions.py      | 8 ++++++++
 lark/parsers/earley.py  | 8 ++++----
 lark/parsers/xearley.py | 3 ++-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/lark/exceptions.py b/lark/exceptions.py
index 4207589..28f1b4b 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -13,6 +13,14 @@ class ParseError(LarkError):
 class LexError(LarkError):
     pass
 
+class UnexpectedEOF(ParseError):
+    def __init__(self, expected):
+        self.expected = expected
+
+        message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
+        super(UnexpectedEOF, self).__init__(message)
+
+
 class UnexpectedInput(LarkError):
     pos_in_stream = None
 
diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index 87920c3..e18d26c 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -14,7 +14,7 @@ import logging
 from collections import deque
 
 from ..visitors import Transformer_InPlace, v_args
-from ..exceptions import ParseError, UnexpectedToken
+from ..exceptions import UnexpectedEOF, UnexpectedToken
 from .grammar_analysis import GrammarAnalyzer
 from ..grammar import NonTerminal
 from .earley_common import Item, TransitiveItem
@@ -270,6 +270,7 @@ class Parser:
 
         ## Column is now the final column in the parse.
         assert i == len(columns)-1
+        return to_scan
 
     def parse(self, stream, start):
         assert start, start
@@ -288,7 +289,7 @@ class Parser:
             else:
                 columns[0].add(item)
 
-        self._parse(stream, columns, to_scan, start_symbol)
+        to_scan = self._parse(stream, columns, to_scan, start_symbol)
 
         # If the parse was successful, the start
         # symbol should have been completed in the last step of the Earley cycle, and will be in
@@ -306,8 +307,7 @@ class Parser:
 
         if not solutions:
             expected_tokens = [t.expect for t in to_scan]
-            # raise ParseError('Incomplete parse: Could not find a solution to input')
-            raise ParseError('Unexpected end of input! Expecting a terminal of: %s' % expected_tokens)
+            raise UnexpectedEOF(expected_tokens)
         elif len(solutions) > 1:
             assert False, 'Earley should not generate multiple start symbol items!'
 
diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py
index 3898d6a..f32d0d1 100644
--- a/lark/parsers/xearley.py
+++ b/lark/parsers/xearley.py
@@ -146,4 +146,5 @@ class Parser(BaseParser):
         self.predict_and_complete(i, to_scan, columns, transitives)
 
         ## Column is now the final column in the parse.
-        assert i == len(columns)-1
\ No newline at end of file
+        assert i == len(columns)-1
+        return to_scan
\ No newline at end of file

From 8685a5afc33781bcc72a14ee8be480eb3d4d73bf Mon Sep 17 00:00:00 2001
From: Francesco Franchina <cescus92@gmail.com>
Date: Wed, 16 Oct 2019 13:34:25 +0200
Subject: [PATCH 076/132] Fixed some typos in the docs

---
 docs/classes.md        | 2 +-
 docs/grammar.md        | 2 +-
 docs/how_to_develop.md | 4 ++--
 docs/parsers.md        | 6 +++---
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/classes.md b/docs/classes.md
index 9943fd4..f77d7b8 100644
--- a/docs/classes.md
+++ b/docs/classes.md
@@ -96,7 +96,7 @@ Trees can be hashed and compared.
 
 Transformers & Visitors provide a convenient interface to process the parse-trees that Lark returns.
 
-They are used by inheriting from the correct class (visitor or transformer), and implementing methods corresponding to the rule you wish to process. Each methods accepts the children as an argument. That can be modified using the `v-args` decorator, which allows to inline the arguments (akin to `*args`), or add the tree `meta` property as an argument.
+They are used by inheriting from the correct class (visitor or transformer), and implementing methods corresponding to the rule you wish to process. Each methods accepts the children as an argument. That can be modified using the `v_args` decorator, which allows to inline the arguments (akin to `*args`), or add the tree `meta` property as an argument.
 
 See: https://github.com/lark-parser/lark/blob/master/lark/visitors.py
 
diff --git a/docs/grammar.md b/docs/grammar.md
index 228c8b7..8a8913b 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -179,7 +179,7 @@ All occurrences of the terminal will be ignored, and won't be part of the parse.
 
 Using the `%ignore` directive results in a cleaner grammar.
 
-It's especially important for the LALR(1) algorithm, because adding whitespace (or comments, or other extranous elements) explicitly in the grammar, harms its predictive abilities, which are based on a lookahead of 1.
+It's especially important for the LALR(1) algorithm, because adding whitespace (or comments, or other extraneous elements) explicitly in the grammar, harms its predictive abilities, which are based on a lookahead of 1.
 
 **Syntax:**
 ```html
diff --git a/docs/how_to_develop.md b/docs/how_to_develop.md
index d69a1e3..b161e0c 100644
--- a/docs/how_to_develop.md
+++ b/docs/how_to_develop.md
@@ -7,7 +7,7 @@ There are many ways you can help the project:
 * Write new grammars for Lark's library
 * Write a blog post introducing Lark to your audience
 * Port Lark to another language
-* Help me with code developemnt
+* Help me with code development
 
 If you're interested in taking one of these on, let me know and I will provide more details and assist you in the process.
 
@@ -60,4 +60,4 @@ Another way to run the tests is using setup.py:
 
 ```bash
 python setup.py test 
-```
\ No newline at end of file
+```
diff --git a/docs/parsers.md b/docs/parsers.md
index fb7c997..c487238 100644
--- a/docs/parsers.md
+++ b/docs/parsers.md
@@ -5,9 +5,9 @@ Lark implements the following parsing algorithms: Earley, LALR(1), and CYK
 
 An [Earley Parser](https://www.wikiwand.com/en/Earley_parser) is a chart parser capable of parsing any context-free grammar at O(n^3), and O(n^2) when the grammar is unambiguous. It can parse most LR grammars at O(n). Most programming languages are LR, and can be parsed at a linear time.
 
-Lark's Earley implementation runs on top of a skipping chart parser, which allows it to use regular expressions, instead of matching characters one-by-one. This is a huge improvement to Earley that is unique to Lark. This feature is used by default, but can also be requested explicitely using `lexer='dynamic'`.
+Lark's Earley implementation runs on top of a skipping chart parser, which allows it to use regular expressions, instead of matching characters one-by-one. This is a huge improvement to Earley that is unique to Lark. This feature is used by default, but can also be requested explicitly using `lexer='dynamic'`.
 
-It's possible to bypass the dynamic lexing, and use the regular Earley parser with a traditional lexer, that tokenizes as an independant first step. Doing so will provide a speed benefit, but will tokenize without using Earley's ambiguity-resolution ability. So choose this only if you know why! Activate with `lexer='standard'`
+It's possible to bypass the dynamic lexing, and use the regular Earley parser with a traditional lexer, that tokenizes as an independent first step. Doing so will provide a speed benefit, but will tokenize without using Earley's ambiguity-resolution ability. So choose this only if you know why! Activate with `lexer='standard'`
 
 **SPPF & Ambiguity resolution**
 
@@ -21,7 +21,7 @@ Lark provides the following options to combat ambiguity:
 
 1) Lark will choose the best derivation for you (default). Users can choose between different disambiguation strategies, and can prioritize (or demote) individual rules over others, using the rule-priority syntax.
 
-2) Users may choose to recieve the set of all possible parse-trees (using ambiguity='explicit'), and choose the best derivation themselves. While simple and flexible, it comes at the cost of space and performance, and so it isn't recommended for highly ambiguous grammars, or very long inputs.
+2) Users may choose to receive the set of all possible parse-trees (using ambiguity='explicit'), and choose the best derivation themselves. While simple and flexible, it comes at the cost of space and performance, and so it isn't recommended for highly ambiguous grammars, or very long inputs.
 
 3) As an advanced feature, users may use specialized visitors to iterate the SPPF themselves. Future versions of Lark intend to improve and simplify this interface.
 

From 35e102903402e48d8c0090b915b864615a4a70ce Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Tue, 22 Oct 2019 20:38:57 +0300
Subject: [PATCH 077/132] Start parameter now accepts unicode in Python 2
 (Issue #459)

---
 lark/lark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/lark.py b/lark/lark.py
index c27f534..47c6fba 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -86,7 +86,7 @@ class LarkOptions(Serialize):
 
             options[name] = value
 
-        if isinstance(options['start'], str):
+        if isinstance(options['start'], STRING_TYPE):
             options['start'] = [options['start']]
 
         self.__dict__['options'] = options

From f3714a572f047c5857a2b3ab8d8a161e142f20bf Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Tue, 22 Oct 2019 20:43:30 +0300
Subject: [PATCH 078/132] Now prints a nice warning instead of failing tests
 when js2py isn't installed

---
 tests/__main__.py                  | 2 +-
 tests/test_nearley/test_nearley.py | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/__main__.py b/tests/__main__.py
index 4762773..901f101 100644
--- a/tests/__main__.py
+++ b/tests/__main__.py
@@ -10,7 +10,7 @@ from .test_reconstructor import TestReconstructor
 try:
     from .test_nearley.test_nearley import TestNearley
 except ImportError:
-    pass
+    logging.warn("Warning: Skipping tests for Nearley (js2py required)")
 
 # from .test_selectors import TestSelectors
 # from .test_grammars import TestPythonG, TestConfigG
diff --git a/tests/test_nearley/test_nearley.py b/tests/test_nearley/test_nearley.py
index 721db1d..647f489 100644
--- a/tests/test_nearley/test_nearley.py
+++ b/tests/test_nearley/test_nearley.py
@@ -15,9 +15,12 @@ NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley')
 BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin')
 
 if not os.path.exists(NEARLEY_PATH):
-    print("Skipping Nearley tests!")
+    logging.warn("Nearley not installed. Skipping Nearley tests!")
     raise ImportError("Skipping Nearley tests!")
 
+import js2py    # Ensures that js2py exists, to avoid failing tests
+
+
 class TestNearley(unittest.TestCase):
     def test_css(self):
         fn = os.path.join(NEARLEY_PATH, 'examples/csscolor.ne')

From 9f218f85b6786bc28ac08ffbcc3359e1545b394e Mon Sep 17 00:00:00 2001
From: Timo Furrer <tuxtimo@gmail.com>
Date: Fri, 25 Oct 2019 20:20:27 +0200
Subject: [PATCH 079/132] Copy exc state when converting UnexpectedCharacters
 to UnexpectedToken exc. Fixes #462

---
 lark/lexer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 26213ed..f57ae51 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -384,6 +384,6 @@ class ContextualLexer(Lexer):
 
             value, type_ = root_match
             t = Token(type_, value, e.pos_in_stream, e.line, e.column)
-            raise UnexpectedToken(t, e.allowed)
+            raise UnexpectedToken(t, e.allowed, state=e.state)
 
 ###}

From 17b6d6d3b3a14550722b084391caf9cd12d580e6 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Fri, 1 Nov 2019 14:37:16 +0200
Subject: [PATCH 080/132] BUGFIX for declared terminals

---
 lark/load_grammar.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 1b4ab65..a65ca1e 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -656,16 +656,18 @@ def resolve_term_references(term_defs):
                         raise GrammarError("Rules aren't allowed inside terminals (%s in %s)" % (item, name))
                     if item.type == 'TERMINAL':
                         term_value = term_dict[item]
+                        assert term_value is not None
                         exp.children[0] = term_value
                         changed = True
         if not changed:
             break
 
     for name, term in term_dict.items():
-        for child in term.children:
-            ids = [id(x) for x in child.iter_subtrees()]
-            if id(term) in ids:
-                raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name)
+        if term:    # Not just declared
+            for child in term.children:
+                ids = [id(x) for x in child.iter_subtrees()]
+                if id(term) in ids:
+                    raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name)
 
 
 def options_from_rule(name, *x):

From f07359c31683805f4004fe2d6f37dec84b7c094f Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Fri, 1 Nov 2019 14:39:25 +0200
Subject: [PATCH 081/132] Version bump

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index 6d8b325..ff24424 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.7.7"
+__version__ = "0.7.8"

From 1f0b1e7520b7ce00d71d9569f49d8c86b49ccc70 Mon Sep 17 00:00:00 2001
From: Giuliano Oliveira <giuliano.llpinokio@gmail.com>
Date: Fri, 1 Nov 2019 18:53:31 -0400
Subject: [PATCH 082/132] added visit_topdown methods to Visitor classes

---
 lark/visitors.py    | 17 +++++++++++++++--
 tests/test_trees.py | 39 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/lark/visitors.py b/lark/visitors.py
index 7d40e74..c6e4f6b 100644
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -186,6 +186,11 @@ class Visitor(VisitorBase):
             self._call_userfunc(subtree)
         return tree
 
+    def visit_topdown(self,tree):
+        for subtree in tree.iter_subtrees_topdown():
+            self._call_userfunc(subtree)
+        return tree        
+
 class Visitor_Recursive(VisitorBase):
     """Bottom-up visitor, recursive
 
@@ -198,8 +203,16 @@ class Visitor_Recursive(VisitorBase):
             if isinstance(child, Tree):
                 self.visit(child)
 
-        f = getattr(self, tree.data, self.__default__)
-        f(tree)
+        self._call_userfunc(tree)
+        return tree
+
+    def visit_topdown(self,tree):
+        self._call_userfunc(tree)
+
+        for child in tree.children:
+            if isinstance(child, Tree):
+                self.visit_topdown(child)
+        
         return tree
 
 
diff --git a/tests/test_trees.py b/tests/test_trees.py
index 4216bd6..edd2a8b 100644
--- a/tests/test_trees.py
+++ b/tests/test_trees.py
@@ -7,7 +7,7 @@ import pickle
 import functools
 
 from lark.tree import Tree
-from lark.visitors import Transformer, Interpreter, visit_children_decor, v_args, Discard
+from lark.visitors import Visitor, Visitor_Recursive, Transformer, Interpreter, visit_children_decor, v_args, Discard
 
 
 class TestTrees(TestCase):
@@ -34,6 +34,43 @@ class TestTrees(TestCase):
         nodes = list(self.tree1.iter_subtrees_topdown())
         self.assertEqual(nodes, expected)
 
+    def test_visitor(self):
+        class Visitor1(Visitor):
+            def __init__(self):
+                self.nodes=[]
+
+            def __default__(self,tree):
+                self.nodes.append(tree)
+        class Visitor1_Recursive(Visitor_Recursive):
+            def __init__(self):
+                self.nodes=[]
+
+            def __default__(self,tree):
+                self.nodes.append(tree)
+
+        visitor1=Visitor1()
+        visitor1_recursive=Visitor1_Recursive()
+
+        expected_top_down = [Tree('a', [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')]),
+                    Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')]
+        expected_botton_up= [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z'),
+                    Tree('a', [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')])]
+
+        visitor1.visit(self.tree1)
+        self.assertEqual(visitor1.nodes,expected_botton_up)
+
+        visitor1_recursive.visit(self.tree1)
+        self.assertEqual(visitor1_recursive.nodes,expected_botton_up)
+
+        visitor1.nodes=[]
+        visitor1_recursive.nodes=[]
+
+        visitor1.visit_topdown(self.tree1)
+        self.assertEqual(visitor1.nodes,expected_top_down)
+
+        visitor1_recursive.visit_topdown(self.tree1)
+        self.assertEqual(visitor1_recursive.nodes,expected_top_down)
+
     def test_interp(self):
         t = Tree('a', [Tree('b', []), Tree('c', []), 'd'])
 

From 5b930b5973d5e5226209cdf09b3094b69df4e1b9 Mon Sep 17 00:00:00 2001
From: Hao Wu <echowuhao@gmail.com>
Date: Sun, 3 Nov 2019 08:03:04 +0800
Subject: [PATCH 083/132] page for transformer_and_visitor

---
 docs/classes.md                | 113 --------------------------------
 docs/transfromer_and_vistor.md | 115 +++++++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+), 113 deletions(-)
 create mode 100644 docs/transfromer_and_vistor.md

diff --git a/docs/classes.md b/docs/classes.md
index f77d7b8..ee6e76f 100644
--- a/docs/classes.md
+++ b/docs/classes.md
@@ -94,119 +94,6 @@ Trees can be hashed and compared.
 
 ## Transformers & Visitors
 
-Transformers & Visitors provide a convenient interface to process the parse-trees that Lark returns.
-
-They are used by inheriting from the correct class (visitor or transformer), and implementing methods corresponding to the rule you wish to process. Each methods accepts the children as an argument. That can be modified using the `v_args` decorator, which allows to inline the arguments (akin to `*args`), or add the tree `meta` property as an argument.
-
-See: https://github.com/lark-parser/lark/blob/master/lark/visitors.py
-
-### Visitors
-
-Visitors visit each node of the tree, and run the appropriate method on it according to the node's data.
-
-They work bottom-up, starting with the leaves and ending at the root of the tree.
-
-**Example**
-```python
-class IncreaseAllNumbers(Visitor):
-  def number(self, tree):
-    assert tree.data == "number"
-    tree.children[0] += 1
-
-IncreaseAllNumbers().visit(parse_tree)
-```
-
-There are two classes that implement the visitor interface:
-
-* Visitor - Visit every node (without recursion)
-
-* Visitor_Recursive - Visit every node using recursion. Slightly faster.
-
-### Transformers
-
-Transformers visit each node of the tree, and run the appropriate method on it according to the node's data.
-
-They work bottom-up (or: depth-first), starting with the leaves and ending at the root of the tree.
-
-Transformers can be used to implement map & reduce patterns.
-
-Because nodes are reduced from leaf to root, at any point the callbacks may assume the children have already been transformed (if applicable).
-
-Transformers can be chained into a new transformer by using multiplication.
-
-**Example:**
-```python
-from lark import Tree, Transformer
-
-class EvalExpressions(Transformer):
-    def expr(self, args):
-            return eval(args[0])
-
-t = Tree('a', [Tree('expr', ['1+2'])])
-print(EvalExpressions().transform( t ))
-
-# Prints: Tree(a, [3])
-```
-
-
-Here are the classes that implement the transformer interface:
-
-- Transformer - Recursively transforms the tree. This is the one you probably want.
-- Transformer_InPlace - Non-recursive. Changes the tree in-place instead of returning new instances
-- Transformer_InPlaceRecursive - Recursive. Changes the tree in-place instead of returning new instances
-
-### v_args
-
-`v_args` is a decorator.
-
-By default, callback methods of transformers/visitors accept one argument: a list of the node's children. `v_args` can modify this behavior.
-
-When used on a transformer/visitor class definition, it applies to all the callback methods inside it.
-
-`v_args` accepts one of three flags:
-
-- `inline` - Children are provided as `*args` instead of a list argument (not recommended for very long lists).
-- `meta` - Provides two arguments: `children` and `meta` (instead of just the first)
-- `tree` - Provides the entire tree as the argument, instead of the children.
-
-Examples:
-
-```python
-@v_args(inline=True)
-class SolveArith(Transformer):
-    def add(self, left, right):
-        return left + right
-
-
-class ReverseNotation(Transformer_InPlace):
-    @v_args(tree=True):
-    def tree_node(self, tree):
-        tree.children = tree.children[::-1]
-```
-
-### Discard
-
-When raising the `Discard` exception in a transformer callback, that node is discarded and won't appear in the parent.
-
-## Token
-
-When using a lexer, the resulting tokens in the trees will be of the Token class, which inherits from Python's string. So, normal string comparisons and operations will work as expected. Tokens also have other useful attributes:
-
-* `type` - Name of the token (as specified in grammar).
-* `pos_in_stream` - the index of the token in the text
-* `line` - The line of the token in the text (starting with 1)
-* `column` - The column of the token in the text (starting with 1)
-* `end_line` - The line where the token ends
-* `end_column` - The next column after the end of the token. For example, if the token is a single character with a `column` value of 4, `end_column` will be 5.
-
-
-## UnexpectedInput
-
-- `UnexpectedInput`
-    - `UnexpectedToken` - The parser recieved an unexpected token
-    - `UnexpectedCharacters` - The lexer encountered an unexpected string
-
-After catching one of these exceptions, you may call the following helper methods to create a nicer error message:
 
 ### Methods
 
diff --git a/docs/transfromer_and_vistor.md b/docs/transfromer_and_vistor.md
new file mode 100644
index 0000000..8385c93
--- /dev/null
+++ b/docs/transfromer_and_vistor.md
@@ -0,0 +1,115 @@
+## Transformers & Visitors
+
+Transformers & Visitors provide a convenient interface to process the parse-trees that Lark returns.
+
+They are used by inheriting from the correct class (visitor or transformer), and implementing methods corresponding to the rule you wish to process. Each methods accepts the children as an argument. That can be modified using the `v_args` decorator, which allows to inline the arguments (akin to `*args`), or add the tree `meta` property as an argument.
+
+See: https://github.com/lark-parser/lark/blob/master/lark/visitors.py
+
+### Visitors
+
+Visitors visit each node of the tree, and run the appropriate method on it according to the node's data.
+
+They work bottom-up, starting with the leaves and ending at the root of the tree.
+
+**Example**
+```python
+class IncreaseAllNumbers(Visitor):
+  def number(self, tree):
+    assert tree.data == "number"
+    tree.children[0] += 1
+
+IncreaseAllNumbers().visit(parse_tree)
+```
+
+There are two classes that implement the visitor interface:
+
+* Visitor - Visit every node (without recursion)
+
+* Visitor_Recursive - Visit every node using recursion. Slightly faster.
+
+### Transformers
+
+Transformers visit each node of the tree, and run the appropriate method on it according to the node's data.
+
+They work bottom-up (or: depth-first), starting with the leaves and ending at the root of the tree.
+
+Transformers can be used to implement map & reduce patterns.
+
+Because nodes are reduced from leaf to root, at any point the callbacks may assume the children have already been transformed (if applicable).
+
+Transformers can be chained into a new transformer by using multiplication.
+
+**Example:**
+```python
+from lark import Tree, Transformer
+
+class EvalExpressions(Transformer):
+    def expr(self, args):
+            return eval(args[0])
+
+t = Tree('a', [Tree('expr', ['1+2'])])
+print(EvalExpressions().transform( t ))
+
+# Prints: Tree(a, [3])
+```
+
+
+Here are the classes that implement the transformer interface:
+
+- Transformer - Recursively transforms the tree. This is the one you probably want.
+- Transformer_InPlace - Non-recursive. Changes the tree in-place instead of returning new instances
+- Transformer_InPlaceRecursive - Recursive. Changes the tree in-place instead of returning new instances
+
+### v_args
+
+`v_args` is a decorator.
+
+By default, callback methods of transformers/visitors accept one argument: a list of the node's children. `v_args` can modify this behavior.
+
+When used on a transformer/visitor class definition, it applies to all the callback methods inside it.
+
+`v_args` accepts one of three flags:
+
+- `inline` - Children are provided as `*args` instead of a list argument (not recommended for very long lists).
+- `meta` - Provides two arguments: `children` and `meta` (instead of just the first)
+- `tree` - Provides the entire tree as the argument, instead of the children.
+
+Examples:
+
+```python
+@v_args(inline=True)
+class SolveArith(Transformer):
+    def add(self, left, right):
+        return left + right
+
+
+class ReverseNotation(Transformer_InPlace):
+    @v_args(tree=True):
+    def tree_node(self, tree):
+        tree.children = tree.children[::-1]
+```
+
+### Discard
+
+When raising the `Discard` exception in a transformer callback, that node is discarded and won't appear in the parent.
+
+## Token
+
+When using a lexer, the resulting tokens in the trees will be of the Token class, which inherits from Python's string. So, normal string comparisons and operations will work as expected. Tokens also have other useful attributes:
+
+* `type` - Name of the token (as specified in grammar).
+* `pos_in_stream` - the index of the token in the text
+* `line` - The line of the token in the text (starting with 1)
+* `column` - The column of the token in the text (starting with 1)
+* `end_line` - The line where the token ends
+* `end_column` - The next column after the end of the token. For example, if the token is a single character with a `column` value of 4, `end_column` will be 5.
+
+
+## UnexpectedInput
+
+- `UnexpectedInput`
+    - `UnexpectedToken` - The parser recieved an unexpected token
+    - `UnexpectedCharacters` - The lexer encountered an unexpected string
+
+After catching one of these exceptions, you may call the following helper methods to create a nicer error message:

From 6546ea352a199d16a72abc8aadfb0b78396e0b1f Mon Sep 17 00:00:00 2001
From: Hao Wu <echowuhao@gmail.com>
Date: Sun, 3 Nov 2019 08:15:31 +0800
Subject: [PATCH 084/132] too much

---
 docs/classes.md                | 21 ++++++++++++++++++++-
 docs/transfromer_and_vistor.md | 18 ------------------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/docs/classes.md b/docs/classes.md
index ee6e76f..b63f8f1 100644
--- a/docs/classes.md
+++ b/docs/classes.md
@@ -92,8 +92,27 @@ Trees can be hashed and compared.
 
 ----
 
-## Transformers & Visitors
+[Guide](transfromer_and_vistor.md)
 
+## Token
+
+When using a lexer, the resulting tokens in the trees will be of the Token class, which inherits from Python's string. So, normal string comparisons and operations will work as expected. Tokens also have other useful attributes:
+
+* `type` - Name of the token (as specified in grammar).
+* `pos_in_stream` - the index of the token in the text
+* `line` - The line of the token in the text (starting with 1)
+* `column` - The column of the token in the text (starting with 1)
+* `end_line` - The line where the token ends
+* `end_column` - The next column after the end of the token. For example, if the token is a single character with a `column` value of 4, `end_column` will be 5.
+
+
+## UnexpectedInput
+
+- `UnexpectedInput`
+    - `UnexpectedToken` - The parser recieved an unexpected token
+    - `UnexpectedCharacters` - The lexer encountered an unexpected string
+
+After catching one of these exceptions, you may call the following helper methods to create a nicer error message:
 
 ### Methods
 
diff --git a/docs/transfromer_and_vistor.md b/docs/transfromer_and_vistor.md
index 8385c93..c4a24b1 100644
--- a/docs/transfromer_and_vistor.md
+++ b/docs/transfromer_and_vistor.md
@@ -94,22 +94,4 @@ class ReverseNotation(Transformer_InPlace):
 
 When raising the `Discard` exception in a transformer callback, that node is discarded and won't appear in the parent.
 
-## Token
 
-When using a lexer, the resulting tokens in the trees will be of the Token class, which inherits from Python's string. So, normal string comparisons and operations will work as expected. Tokens also have other useful attributes:
-
-* `type` - Name of the token (as specified in grammar).
-* `pos_in_stream` - the index of the token in the text
-* `line` - The line of the token in the text (starting with 1)
-* `column` - The column of the token in the text (starting with 1)
-* `end_line` - The line where the token ends
-* `end_column` - The next column after the end of the token. For example, if the token is a single character with a `column` value of 4, `end_column` will be 5.
-
-
-## UnexpectedInput
-
-- `UnexpectedInput`
-    - `UnexpectedToken` - The parser recieved an unexpected token
-    - `UnexpectedCharacters` - The lexer encountered an unexpected string
-
-After catching one of these exceptions, you may call the following helper methods to create a nicer error message:

From 10a09ebba86fb17c2c13dc3080d6aa1d9987211c Mon Sep 17 00:00:00 2001
From: Hao Wu <echowuhao@gmail.com>
Date: Mon, 4 Nov 2019 10:14:36 +0800
Subject: [PATCH 085/132] transform token doc

---
 docs/transfromer_and_vistor.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/transfromer_and_vistor.md b/docs/transfromer_and_vistor.md
index c4a24b1..463c8d1 100644
--- a/docs/transfromer_and_vistor.md
+++ b/docs/transfromer_and_vistor.md
@@ -40,6 +40,7 @@ Because nodes are reduced from leaf to root, at any point the callbacks may assu
 
 Transformers can be chained into a new transformer by using multiplication.
 
+
 **Example:**
 ```python
 from lark import Tree, Transformer
@@ -54,6 +55,18 @@ print(EvalExpressions().transform( t ))
 # Prints: Tree(a, [3])
 ```
 
+By default, transformer works only on rules, `visit_tokens=True` will make transfomer process tokens. This is handy in parsing simple token, such as turn `INT` to `int`, `NUMBER` to `float`. etc.
+
+```python
+class T(Transformer):
+    INT = int # same with def INT(self, tok): int(tok)
+    NUMBER = float # same with def INT(self, tok): int(tok)
+    def NAME(self, name):
+        return lookup_dict.get(name, name)
+    
+
+T(visit_tokens=True).transform(tree)
+```
 
 Here are the classes that implement the transformer interface:
 

From ca36404257691d45d41b4585eb50cbf7d25a756d Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 4 Nov 2019 11:37:12 +0200
Subject: [PATCH 086/132] Some improvements to visitor documentation

---
 docs/classes.md                               | 24 +++++++--------
 ...{transfromer_and_vistor.md => visitors.md} | 29 ++++++++++++-------
 mkdocs.yml                                    |  1 +
 3 files changed, 31 insertions(+), 23 deletions(-)
 rename docs/{transfromer_and_vistor.md => visitors.md} (78%)

diff --git a/docs/classes.md b/docs/classes.md
index b63f8f1..1555a1f 100644
--- a/docs/classes.md
+++ b/docs/classes.md
@@ -1,15 +1,13 @@
-# Classes - Reference
+# Classes Reference
 
 This page details the important classes in Lark.
 
 ----
 
-## Lark
+## lark.Lark
 
 The Lark class is the main interface for the library. It's mostly a thin wrapper for the many different parsers, and for the tree constructor.
 
-### Methods
-
 #### \_\_init\_\_(self, grammar, **options)
 
 The Lark class accepts a grammar string or file object, and keyword options:
@@ -50,14 +48,10 @@ If a transformer is supplied to `__init__`, returns whatever is the result of th
 
 The main tree class
 
-### Properties
-
 * `data` - The name of the rule or alias
 * `children` - List of matched sub-rules and terminals
 * `meta` - Line & Column numbers, if using `propagate_positions`
 
-### Methods
-
 #### \_\_init\_\_(self, data, children)
 
 Creates a new tree, and stores "data" and "children" in attributes of the same name.
@@ -92,8 +86,6 @@ Trees can be hashed and compared.
 
 ----
 
-[Guide](transfromer_and_vistor.md)
-
 ## Token
 
 When using a lexer, the resulting tokens in the trees will be of the Token class, which inherits from Python's string. So, normal string comparisons and operations will work as expected. Tokens also have other useful attributes:
@@ -105,17 +97,25 @@ When using a lexer, the resulting tokens in the trees will be of the Token class
 * `end_line` - The line where the token ends
 * `end_column` - The next column after the end of the token. For example, if the token is a single character with a `column` value of 4, `end_column` will be 5.
 
+## Transformer
+## Visitor
+## Interpreter
+
+See the [visitors page](visitors.md)
+
 
 ## UnexpectedInput
 
+## UnexpectedToken
+
+## UnexpectedException
+
 - `UnexpectedInput`
     - `UnexpectedToken` - The parser recieved an unexpected token
     - `UnexpectedCharacters` - The lexer encountered an unexpected string
 
 After catching one of these exceptions, you may call the following helper methods to create a nicer error message:
 
-### Methods
-
 #### get_context(text, span)
 
 Returns a pretty string pinpointing the error in the text, with `span` amount of context characters around it.
diff --git a/docs/transfromer_and_vistor.md b/docs/visitors.md
similarity index 78%
rename from docs/transfromer_and_vistor.md
rename to docs/visitors.md
index 463c8d1..c60c1dc 100644
--- a/docs/transfromer_and_vistor.md
+++ b/docs/visitors.md
@@ -2,9 +2,9 @@
 
 Transformers & Visitors provide a convenient interface to process the parse-trees that Lark returns.
 
-They are used by inheriting from the correct class (visitor or transformer), and implementing methods corresponding to the rule you wish to process. Each methods accepts the children as an argument. That can be modified using the `v_args` decorator, which allows to inline the arguments (akin to `*args`), or add the tree `meta` property as an argument.
+They are used by inheriting from the correct class (visitor or transformer), and implementing methods corresponding to the rule you wish to process. Each method accepts the children as an argument. That can be modified using the `v_args` decorator, which allows to inline the arguments (akin to `*args`), or add the tree `meta` property as an argument.
 
-See: https://github.com/lark-parser/lark/blob/master/lark/visitors.py
+See: <a href="https://github.com/lark-parser/lark/blob/master/lark/visitors.py">visitors.py</a>
 
 ### Visitors
 
@@ -40,6 +40,8 @@ Because nodes are reduced from leaf to root, at any point the callbacks may assu
 
 Transformers can be chained into a new transformer by using multiplication.
 
+`Transformer` can do anything `Visitor` can do, but because it reconstructs the tree, it is slightly less efficient.
+
 
 **Example:**
 ```python
@@ -55,24 +57,29 @@ print(EvalExpressions().transform( t ))
 # Prints: Tree(a, [3])
 ```
 
-By default, transformer works only on rules, `visit_tokens=True` will make transfomer process tokens. This is handy in parsing simple token, such as turn `INT` to `int`, `NUMBER` to `float`. etc.
+All these classes implement the transformer interface:
+
+- Transformer - Recursively transforms the tree. This is the one you probably want.
+- Transformer_InPlace - Non-recursive. Changes the tree in-place instead of returning new instances
+- Transformer_InPlaceRecursive - Recursive. Changes the tree in-place instead of returning new instances
+
+### visit_tokens
+
+By default, transformers only visit rules. `visit_tokens=True` will tell Transformer to visit tokens as well. This is a slightly slower alternative to `lexer_callbacks`, but it's easier to maintain and works for all algorithms (even when there isn't a lexer).
+
+Example:
 
 ```python
 class T(Transformer):
-    INT = int # same with def INT(self, tok): int(tok)
-    NUMBER = float # same with def INT(self, tok): int(tok)
+    INT = int
+    NUMBER = float
     def NAME(self, name):
         return lookup_dict.get(name, name)
-    
+
 
 T(visit_tokens=True).transform(tree)
 ```
 
-Here are the classes that implement the transformer interface:
-
-- Transformer - Recursively transforms the tree. This is the one you probably want.
-- Transformer_InPlace - Non-recursive. Changes the tree in-place instead of returning new instances
-- Transformer_InPlaceRecursive - Recursive. Changes the tree in-place instead of returning new instances
 
 ### v_args
 
diff --git a/mkdocs.yml b/mkdocs.yml
index 63bdd61..f5b0d1d 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -9,5 +9,6 @@ pages:
     - How To Develop (Guide): how_to_develop.md
     - Grammar Reference: grammar.md
     - Tree Construction Reference: tree_construction.md
+    - Visitors and Transformers: visitors.md
     - Classes Reference: classes.md
     - Recipes: recipes.md

From b21e89b7f3b86526177e3891a803d5393adf70fa Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 4 Nov 2019 11:44:13 +0200
Subject: [PATCH 087/132] An addition to the docs

---
 docs/index.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index 8517208..d693cce 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -35,8 +35,8 @@ $ pip install lark-parser
 * [Examples](https://github.com/lark-parser/lark/tree/master/examples)
 * Tutorials
     * [How to write a DSL](http://blog.erezsh.com/how-to-write-a-dsl-in-python-with-lark/) - Implements a toy LOGO-like language with an interpreter
-    * [How to write a JSON parser](json_tutorial.md)
-    * External
+    * [How to write a JSON parser](json_tutorial.md) - Teaches you how to use Lark
+    * Unofficial
         * [Program Synthesis is Possible](https://www.cs.cornell.edu/~asampson/blog/minisynth.html) - Creates a DSL for Z3
 * Guides
     * [How to use Lark](how_to_use.md)
@@ -44,6 +44,7 @@ $ pip install lark-parser
 * Reference
     * [Grammar](grammar.md)
     * [Tree Construction](tree_construction.md)
+    * [Visitors & Transformers](visitors.md)
     * [Classes](classes.md)
     * [Cheatsheet (PDF)](lark_cheatsheet.pdf)
 * Discussion

From 5e37fe458d8518ea9a3d1e9621389419d77459dc Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 4 Nov 2019 11:48:33 +0200
Subject: [PATCH 088/132] Version bump (0.8.0rc1)

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index ff24424..0906eb7 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.7.8"
+__version__ = "0.8.0rc1"

From e39bfa1b18f328adb40d785049e07e9a3264eae8 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 13 Nov 2019 10:00:11 +0200
Subject: [PATCH 089/132] Bugfix: Some tokens did not recieve and end_line
 (Issue #472)

---
 lark/lexer.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index f57ae51..806d575 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -175,24 +175,24 @@ class _Lex:
 
             value, type_ = res
 
-            t = None
             if type_ not in ignore_types:
                 t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
+                line_ctr.feed(value, type_ in newline_types)
+                t.end_line = line_ctr.line
+                t.end_column = line_ctr.column
                 if t.type in lexer.callback:
                     t = lexer.callback[t.type](t)
                     if not isinstance(t, Token):
                         raise ValueError("Callbacks must return a token (returned %r)" % t)
-                last_token = t
                 yield t
+                last_token = t
             else:
                 if type_ in lexer.callback:
-                    t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
-                    lexer.callback[type_](t)
+                    t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
+                    lexer.callback[type_](t2)
+                line_ctr.feed(value, type_ in newline_types)
+
 
-            line_ctr.feed(value, type_ in newline_types)
-            if t:
-                t.end_line = line_ctr.line
-                t.end_column = line_ctr.column
 
 
 class UnlessCallback:

From 84f08a452f6aded0530948757841e61e2a4a423d Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sat, 16 Nov 2019 08:52:06 +0200
Subject: [PATCH 090/132] propagate_positions & maybe_placeholders are now true
 by default, updated docs, tests & examples accordingly (Issue #449, #451)

---
 docs/classes.md              | 24 +++++++++++++-----------
 docs/grammar.md              |  4 ++--
 examples/custom_lexer.py     |  2 +-
 examples/reconstruct_json.py | 10 ++--------
 lark/lark.py                 |  4 ++--
 lark/reconstruct.py          |  1 +
 tests/test_parser.py         |  2 +-
 tests/test_reconstructor.py  |  4 ++--
 8 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/docs/classes.md b/docs/classes.md
index 1555a1f..1d59551 100644
--- a/docs/classes.md
+++ b/docs/classes.md
@@ -12,29 +12,31 @@ The Lark class is the main interface for the library. It's mostly a thin wrapper
 
 The Lark class accepts a grammar string or file object, and keyword options:
 
-* start - The symbol in the grammar that begins the parse (Default: `"start"`)
+* **start** - A list of the rules in the grammar that begin the parse (Default: `["start"]`)
 
-* parser - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`)
+* **parser** - Decides which parser engine to use, "earley", "lalr" or "cyk". (Default: `"earley"`)
 
-* lexer - Overrides default lexer.
+* **lexer** - Overrides default lexer, depending on parser.
 
-* transformer - Applies the transformer instead of building a parse tree (only allowed with parser="lalr")
+* **transformer** - Applies the provided transformer instead of building a parse tree (only allowed with parser="lalr")
 
-* postlex - Lexer post-processing (Default: None. only works when lexer is "standard" or "contextual")
+* **postlex** - Lexer post-processing (Default: `None`. only works when lexer is "standard" or "contextual")
 
-* ambiguity (only relevant for earley and cyk)
+* **ambiguity** (only relevant for earley and cyk)
 
      * "explicit" - Return all derivations inside an "_ambig" data node.
 
      * "resolve" - Let the parser choose the best derivation (greedy for tokens, non-greedy for rules. Default)
 
-* debug - Display warnings (such as Shift-Reduce warnings for LALR)
+* **debug** - Display warnings (such as Shift-Reduce warnings for LALR)
 
-* keep_all_tokens - Don't throw away any terminals from the tree (Default=False)
+* **keep_all_tokens** - Don't throw away any terminals from the tree (Default=`False`)
 
-* propagate_positions - Propagate line/column count to tree nodes (default=False)
+* **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`True`)
 
-* lexer_callbacks - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.
+* **maybe_placeholders** - The `[]` operator returns `None` when not matched. Setting this to `False` makes it behave like the `?` operator, and return no value at all, which may be a little faster (default=`True`)
+
+* **lexer_callbacks** - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.
 
 #### parse(self, text)
 
@@ -50,7 +52,7 @@ The main tree class
 
 * `data` - The name of the rule or alias
 * `children` - List of matched sub-rules and terminals
-* `meta` - Line & Column numbers, if using `propagate_positions`
+* `meta` - Line & Column numbers (unless `propagate_positions` is disabled)
 
 #### \_\_init\_\_(self, data, children)
 
diff --git a/docs/grammar.md b/docs/grammar.md
index 8a8913b..cc518e9 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -147,7 +147,7 @@ Each item is one of:
 * `TERMINAL`
 * `"string literal"` or `/regexp literal/`
 * `(item item ..)` - Group items
-* `[item item ..]` - Maybe. Same as `(item item ..)?`
+* `[item item ..]` - Maybe. Same as `(item item ..)?`, but generates `None` if there is no match
 * `item?` - Zero or one instances of item ("maybe")
 * `item*` - Zero or more instances of item
 * `item+` - One or more instances of item
@@ -157,7 +157,7 @@ Each item is one of:
 **Examples:**
 ```perl
 hello_world: "hello" "world"
-mul: [mul "*"] number     //# Left-recursion is allowed!
+mul: (mul "*")? number     //# Left-recursion is allowed and encouraged!
 expr: expr operator expr
     | value               //# Multi-line, belongs to expr
 
diff --git a/examples/custom_lexer.py b/examples/custom_lexer.py
index 786bf4f..732e614 100644
--- a/examples/custom_lexer.py
+++ b/examples/custom_lexer.py
@@ -29,7 +29,7 @@ parser = Lark("""
         data_item: STR INT*
 
         %declare STR INT
-        """, parser='lalr', lexer=TypeLexer)
+        """, parser='lalr', lexer=TypeLexer, propagate_positions=False)
 
 
 class ParseToDict(Transformer):
diff --git a/examples/reconstruct_json.py b/examples/reconstruct_json.py
index 07df86c..59c58b0 100644
--- a/examples/reconstruct_json.py
+++ b/examples/reconstruct_json.py
@@ -25,15 +25,9 @@ test_json = '''
 
 def test_earley():
 
-    json_parser = Lark(json_grammar)
+    json_parser = Lark(json_grammar, maybe_placeholders=False)
     tree = json_parser.parse(test_json)
 
-    # print ('@@', tree.pretty())
-    # for x in tree.find_data('true'):
-    #     x.data = 'false'
-    #     # x.children[0].value = '"HAHA"'
-
-
     new_json = Reconstructor(json_parser).reconstruct(tree)
     print (new_json)
     print (json.loads(new_json) == json.loads(test_json))
@@ -41,7 +35,7 @@ def test_earley():
 
 def test_lalr():
 
-    json_parser = Lark(json_grammar, parser='lalr')
+    json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
     tree = json_parser.parse(test_json)
 
     new_json = Reconstructor(json_parser).reconstruct(tree)
diff --git a/lark/lark.py b/lark/lark.py
index 47c6fba..db1dfd2 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -66,9 +66,9 @@ class LarkOptions(Serialize):
         'profile': False,
         'priority': 'auto',
         'ambiguity': 'auto',
-        'propagate_positions': False,
+        'propagate_positions': True,
         'lexer_callbacks': {},
-        'maybe_placeholders': False,
+        'maybe_placeholders': True,
         'edit_terminals': None,
     }
 
diff --git a/lark/reconstruct.py b/lark/reconstruct.py
index c446913..fb47b93 100644
--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -69,6 +69,7 @@ class MakeMatchTree:
 class Reconstructor:
     def __init__(self, parser):
         # XXX TODO calling compile twice returns different results!
+        assert parser.options.maybe_placeholders == False
         tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)
 
         self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
diff --git a/tests/test_parser.py b/tests/test_parser.py
index e9d46e5..35b3015 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -963,7 +963,7 @@ def _make_parser_test(LEXER, PARSER):
 
         @unittest.skipIf(PARSER == 'cyk', "No empty rules")
         def test_twice_empty(self):
-            g = """!start: [["A"]]
+            g = """!start: ("A"?)?
                 """
             l = _Lark(g)
             tree = l.parse('A')
diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py
index 526d2e2..ecab499 100644
--- a/tests/test_reconstructor.py
+++ b/tests/test_reconstructor.py
@@ -16,7 +16,7 @@ def _remove_ws(s):
 class TestReconstructor(TestCase):
 
     def assert_reconstruct(self, grammar, code):
-        parser = Lark(grammar, parser='lalr')
+        parser = Lark(grammar, parser='lalr', maybe_placeholders=False)
         tree = parser.parse(code)
         new = Reconstructor(parser).reconstruct(tree)
         self.assertEqual(_remove_ws(code), _remove_ws(new))
@@ -105,7 +105,7 @@ class TestReconstructor(TestCase):
             %ignore WS
         """
 
-        json_parser = Lark(json_grammar, parser='lalr')
+        json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
         tree = json_parser.parse(test_json)
 
         new_json = Reconstructor(json_parser).reconstruct(tree)

From 175c4038305048493618ea64b1d065144624459b Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 17 Nov 2019 15:24:27 +0200
Subject: [PATCH 091/132] Transformers now visit tokens by default

---
 lark/visitors.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lark/visitors.py b/lark/visitors.py
index c6e4f6b..4f32091 100644
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -22,8 +22,8 @@ class Transformer:
     Can be used to implement map or reduce.
     """
 
-    __visit_tokens__ = False   # For backwards compatibility
-    def __init__(self,  visit_tokens=False):
+    __visit_tokens__ = True   # For backwards compatibility
+    def __init__(self,  visit_tokens=True):
         self.__visit_tokens__ = visit_tokens
 
     def _call_userfunc(self, tree, new_children=None):
@@ -189,7 +189,7 @@ class Visitor(VisitorBase):
     def visit_topdown(self,tree):
         for subtree in tree.iter_subtrees_topdown():
             self._call_userfunc(subtree)
-        return tree        
+        return tree
 
 class Visitor_Recursive(VisitorBase):
     """Bottom-up visitor, recursive
@@ -212,7 +212,7 @@ class Visitor_Recursive(VisitorBase):
         for child in tree.children:
             if isinstance(child, Tree):
                 self.visit_topdown(child)
-        
+
         return tree
 
 

From 1815bd7fbda932933fed604579e955a8c4d7c021 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 17 Nov 2019 16:01:34 +0200
Subject: [PATCH 092/132] Support for token visitation in internal
 transformers, as an alternative mechanism for lexer_callbacks

---
 lark/exceptions.py   |  6 +++---
 lark/lark.py         | 11 ++++++++++-
 lark/lexer.py        |  7 +++++++
 lark/visitors.py     |  4 ++--
 tests/test_parser.py | 20 +++++++++++++-------
 5 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/lark/exceptions.py b/lark/exceptions.py
index 28f1b4b..f46fa82 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -97,10 +97,10 @@ class UnexpectedToken(ParseError, UnexpectedInput):
         super(UnexpectedToken, self).__init__(message)
 
 class VisitError(LarkError):
-    def __init__(self, tree, orig_exc):
-        self.tree = tree
+    def __init__(self, rule, obj, orig_exc):
+        self.obj = obj
         self.orig_exc = orig_exc
 
-        message = 'Error trying to process rule "%s":\n\n%s' % (tree.data, orig_exc)
+        message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
         super(VisitError, self).__init__(message)
 ###}
diff --git a/lark/lark.py b/lark/lark.py
index db1dfd2..d334cc7 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -225,7 +225,16 @@ class Lark(Serialize):
             for rule in self.rules:
                 if rule.options and rule.options.priority is not None:
                     rule.options.priority = None
-        self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)
+
+        # TODO Deprecate lexer_callbacks?
+        lexer_callbacks = dict(self.options.lexer_callbacks)
+        if self.options.transformer:
+            t = self.options.transformer
+            for term in self.terminals:
+                if hasattr(t, term.name):
+                    lexer_callbacks[term.name] = getattr(t, term.name)
+
+        self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, lexer_callbacks)
 
         if self.options.parser:
             self.parser = self._build_parser()
diff --git a/lark/lexer.py b/lark/lexer.py
index 806d575..21951e4 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -108,6 +108,13 @@ class Token(Str):
         self.end_column = end_column
         return self
 
+    def update(self, type_=None, value=None):
+        return Token.new_borrow_pos(
+            type_ if type_ is not None else self.type,
+            value if value is not None else self.value,
+            self
+        )
+
     @classmethod
     def new_borrow_pos(cls, type_, value, borrow_t):
         return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column)
diff --git a/lark/visitors.py b/lark/visitors.py
index 4f32091..a2d5e86 100644
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -48,7 +48,7 @@ class Transformer:
             except (GrammarError, Discard):
                 raise
             except Exception as e:
-                raise VisitError(tree, e)
+                raise VisitError(tree.data, tree, e)
 
     def _call_userfunc_token(self, token):
         try:
@@ -61,7 +61,7 @@ class Transformer:
             except (GrammarError, Discard):
                 raise
             except Exception as e:
-                raise VisitError(token, e)
+                raise VisitError(token.type, token, e)
 
 
     def _transform_children(self, children):
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 35b3015..caee80e 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -99,16 +99,22 @@ class TestParsers(unittest.TestCase):
             def a(self, children):
                 return children[0] + "!"
             def A(self, tok):
-                return tok.upper()
+                return tok.update(value=tok.upper())
 
         # Test regular
-        g = Lark("""start: a
-                    a : A
-                    A: "x"
-                 """, parser='lalr')
-        r = T().transform(g.parse("x"))
+        g = """start: a
+            a : A
+            A: "x"
+            """
+        p = Lark(g, parser='lalr')
+        r = T(False).transform(p.parse("x"))
         self.assertEqual( r.children, ["x!"] )
-        r = T(True).transform(g.parse("x"))
+        r = T().transform(p.parse("x"))
+        self.assertEqual( r.children, ["X!"] )
+
+        # Test internal transformer
+        p = Lark(g, parser='lalr', transformer=T())
+        r = p.parse("x")
         self.assertEqual( r.children, ["X!"] )
 
 

From 58d6d9fac1883476ea890634124fbfbabc952650 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 17 Nov 2019 16:10:54 +0200
Subject: [PATCH 093/132] Added Token.end_pos, and updated docs regarding
 recent commits

---
 docs/recipes.md            | 12 ++++++------
 examples/custom_lexer.py   |  2 +-
 lark/lexer.py              |  8 +++++---
 lark/parse_tree_builder.py |  2 +-
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/docs/recipes.md b/docs/recipes.md
index 2202ab7..4e734e7 100644
--- a/docs/recipes.md
+++ b/docs/recipes.md
@@ -19,18 +19,18 @@ It only works with the standard and contextual lexers.
 ### Example 1: Replace string values with ints for INT tokens
 
 ```python
-from lark import Lark, Token
+from lark import Lark, Transformer
 
-def tok_to_int(tok):
-    "Convert the value of `tok` from string to int, while maintaining line number & column."
-    # tok.type == 'INT'
-    return Token.new_borrow_pos(tok.type, int(tok), tok)
+class T(Transformer):
+    def INT(self, tok):
+        "Convert the value of `tok` from string to int, while maintaining line number & column."
+        return tok.update(value=int(tok))
 
 parser = Lark("""
 start: INT*
 %import common.INT
 %ignore " "
-""", parser="lalr", lexer_callbacks = {'INT': tok_to_int})
+""", parser="lalr", transformer=T())
 
 print(parser.parse('3 14 159'))
 ```
diff --git a/examples/custom_lexer.py b/examples/custom_lexer.py
index 732e614..786bf4f 100644
--- a/examples/custom_lexer.py
+++ b/examples/custom_lexer.py
@@ -29,7 +29,7 @@ parser = Lark("""
         data_item: STR INT*
 
         %declare STR INT
-        """, parser='lalr', lexer=TypeLexer, propagate_positions=False)
+        """, parser='lalr', lexer=TypeLexer)
 
 
 class ParseToDict(Transformer):
diff --git a/lark/lexer.py b/lark/lexer.py
index 21951e4..871b25e 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -90,9 +90,9 @@ class TerminalDef(Serialize):
 
 
 class Token(Str):
-    __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column')
+    __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
 
-    def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None):
+    def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
         try:
             self = super(Token, cls).__new__(cls, value)
         except UnicodeDecodeError:
@@ -106,6 +106,7 @@ class Token(Str):
         self.column = column
         self.end_line = end_line
         self.end_column = end_column
+        self.end_pos = end_pos
         return self
 
     def update(self, type_=None, value=None):
@@ -117,7 +118,7 @@ class Token(Str):
 
     @classmethod
     def new_borrow_pos(cls, type_, value, borrow_t):
-        return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column)
+        return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
 
     def __reduce__(self):
         return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, ))
@@ -187,6 +188,7 @@ class _Lex:
                 line_ctr.feed(value, type_ in newline_types)
                 t.end_line = line_ctr.line
                 t.end_column = line_ctr.column
+                t.end_pos = line_ctr.char_pos
                 if t.type in lexer.callback:
                     t = lexer.callback[t.type](t)
                     if not isinstance(t, Token):
diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py
index b54b6e8..3c47ef0 100644
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -51,7 +51,7 @@ class PropagatePositions:
                 elif isinstance(c, Token):
                     res.meta.end_line = c.end_line
                     res.meta.end_column = c.end_column
-                    res.meta.end_pos = c.pos_in_stream + len(c.value)
+                    res.meta.end_pos = c.end_pos
                     res.meta.empty = False
                     break
 

From 54027942515054682a2958d7a7570a162311c177 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sun, 17 Nov 2019 16:35:53 +0200
Subject: [PATCH 094/132] Fix for nearley

---
 lark/tools/nearley.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lark/tools/nearley.py b/lark/tools/nearley.py
index 8412259..0b04fb5 100644
--- a/lark/tools/nearley.py
+++ b/lark/tools/nearley.py
@@ -18,7 +18,7 @@ nearley_grammar = r"""
 
     expansion: expr+ js
 
-    ?expr: item [":" /[+*?]/]
+    ?expr: item (":" /[+*?]/)?
 
     ?item: rule|string|regexp|null
          | "(" expansions ")"
@@ -167,7 +167,7 @@ def create_code_for_nearley_grammar(g, start, builtin_path, folder_path):
     emit("    __default__ = lambda self, n, c, m: c if c else None")
 
     emit()
-    emit('parser = Lark(grammar, start="n_%s")' % start)
+    emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
     emit('def parse(text):')
     emit('    return TransformNearley().transform(parser.parse(text))')
 

From 9727eb02264331b3771084ab58778dfbd7fff756 Mon Sep 17 00:00:00 2001
From: Erez Shinan <erezshin+git@gmail.com>
Date: Sun, 17 Nov 2019 20:12:44 +0200
Subject: [PATCH 095/132] Added info to the docs about maybe_placeholders
 (Issue #451)

---
 docs/tree_construction.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/tree_construction.md b/docs/tree_construction.md
index 6b581e0..9e61d4d 100644
--- a/docs/tree_construction.md
+++ b/docs/tree_construction.md
@@ -7,6 +7,10 @@ For example, the rule `node: child1 child2` will create a tree node with two chi
 
 Using `item+` or `item*` will result in a list of items, equivalent to writing `item item item ..`.
 
+Using `item?` will return the item if it matched, or nothing.
+
+Using `[item]` will return the item if it matched, or the value `None`, if it didn't. It's possible to force `[]` to behave like `()?`, by using the `maybe_placeholders=False` option when initializing Lark.
+
 ### Terminals
 
 Terminals are always values in the tree, never branches.

From ed3c131ca8b58aa976ec9e2401493acdd3c30b85 Mon Sep 17 00:00:00 2001
From: Mike Roberts <mike.roberts.2k10@googlemail.com>
Date: Mon, 18 Nov 2019 09:43:50 +0000
Subject: [PATCH 096/132] Allow comments in rule definitions

---
 examples/lark.lark   |  2 +-
 lark/load_grammar.py |  2 +-
 tests/test_parser.py | 10 ++++++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/examples/lark.lark b/examples/lark.lark
index 915cf2e..7373c54 100644
--- a/examples/lark.lark
+++ b/examples/lark.lark
@@ -44,7 +44,7 @@ _NL: /(\r?\n)+\s*/
 %import common.INT -> NUMBER
 %import common.WS_INLINE
 
-COMMENT: "//" /[^\n]/*
+COMMENT: /\s*/ "//" /[^\n]/*
 
 %ignore WS_INLINE
 %ignore COMMENT
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index a65ca1e..1070f86 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -85,7 +85,7 @@ TERMINALS = {
     'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]*' % _RE_FLAGS,
     '_NL': r'(\r?\n)+\s*',
     'WS': r'[ \t]+',
-    'COMMENT': r'//[^\n]*',
+    'COMMENT': r'\s*//[^\n]*',
     '_TO': '->',
     '_IGNORE': r'%ignore',
     '_DECLARE': r'%declare',
diff --git a/tests/test_parser.py b/tests/test_parser.py
index caee80e..8cefcb8 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -94,6 +94,16 @@ class TestParsers(unittest.TestCase):
         r = g.parse('xx')
         self.assertEqual( r.children[0].data, "c" )
 
+    def test_comment_in_rule_definition(self):
+        g = Lark("""start: a
+               a: "a"
+               // A comment
+               // Another
+                | "b"
+            """)
+        r = g.parse('b')
+        self.assertEqual( r.children[0].data, "a" )
+
     def test_visit_tokens(self):
         class T(Transformer):
             def a(self, children):

From 99a27663f6fb64c50f61d8b79eb53d2882d0c401 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 20 Nov 2019 21:16:22 +0200
Subject: [PATCH 097/132] Better error message

---
 lark/parsers/earley_forest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/parsers/earley_forest.py b/lark/parsers/earley_forest.py
index bbceb42..e6179e6 100644
--- a/lark/parsers/earley_forest.py
+++ b/lark/parsers/earley_forest.py
@@ -195,7 +195,7 @@ class ForestVisitor(object):
                     continue
 
                 if id(next_node) in visiting:
-                    raise ParseError("Infinite recursion in grammar!")
+                    raise ParseError("Infinite recursion in grammar, in rule '%s'!" % next_node.s.name)
 
                 input_stack.append(next_node)
                 continue

From 86f1bb1db69196f13a288bab54c1ae5966b49c80 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 20 Nov 2019 22:31:20 +0200
Subject: [PATCH 098/132] Improved the reconstructor, but it still feels like a
 lost cause

---
 lark/reconstruct.py | 53 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 8 deletions(-)

diff --git a/lark/reconstruct.py b/lark/reconstruct.py
index fb47b93..b7a6659 100644
--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -19,9 +19,13 @@ def is_iter_empty(i):
     except StopIteration:
         return True
 
+
 class WriteTokensTransformer(Transformer_InPlace):
-    def __init__(self, tokens):
+    "Inserts discarded tokens into their correct place, according to the rules of grammar"
+
+    def __init__(self, tokens, term_subs):
         self.tokens = tokens
+        self.term_subs = term_subs
 
     def __default__(self, data, children, meta):
         #  if not isinstance(t, MatchTree):
@@ -33,10 +37,15 @@ class WriteTokensTransformer(Transformer_InPlace):
         to_write = []
         for sym in meta.orig_expansion:
             if is_discarded_terminal(sym):
-                t = self.tokens[sym.name]
-                if not isinstance(t.pattern, PatternStr):
-                    raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
-                to_write.append(t.pattern.value)
+                try:
+                    v = self.term_subs[sym.name](sym)
+                except KeyError:
+                    t = self.tokens[sym.name]
+                    if not isinstance(t.pattern, PatternStr):
+                        raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
+
+                    v = t.pattern.value
+                to_write.append(v)
             else:
                 x = next(iter_args)
                 if isinstance(x, list):
@@ -66,14 +75,34 @@ class MakeMatchTree:
         t.meta.orig_expansion = self.expansion
         return t
 
+def best_from_group(seq, group_key, cmp_key):
+    d = {}
+    for item in seq:
+        key = group_key(item)
+        if key in d:
+            v1 = cmp_key(item)
+            v2 = cmp_key(d[key])
+            if v2 > v1:
+                d[key] = item
+        else:
+            d[key] = item
+    return list(d.values())
+
 class Reconstructor:
-    def __init__(self, parser):
+    def __init__(self, parser, term_subs={}):
         # XXX TODO calling compile twice returns different results!
         assert parser.options.maybe_placeholders == False
         tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)
 
-        self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
+        self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs)
         self.rules = list(self._build_recons_rules(rules))
+        self.rules.reverse()
+        # print(len(self.rules))
+        self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion))
+        # print(len(self.rules))
+
+        # self.rules = list(set(list(self._build_recons_rules(rules))))
+        self.rules.sort(key=lambda r: len(r.expansion))
         callbacks = {rule: rule.alias for rule in self.rules}   # TODO pass callbacks through dict, instead of alias?
         self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start),
                                     self._match, resolve_ambiguity=True)
@@ -127,4 +156,12 @@ class Reconstructor:
                 yield item
 
     def reconstruct(self, tree):
-        return ''.join(self._reconstruct(tree))
+        x = self._reconstruct(tree)
+        y = []
+        prev_item = ''
+        for item in x:
+            if prev_item and item and prev_item[-1].isalnum() and item[0].isalnum():
+                y.append(' ')
+            y.append(item)
+            prev_item = item
+        return ''.join(y)

From 0e141ec8962cd875e90127bcb94ed1b7b25db5ad Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 21 Nov 2019 10:43:46 +0200
Subject: [PATCH 099/132] Small addition to docs

---
 docs/classes.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/classes.md b/docs/classes.md
index 1d59551..021b2f4 100644
--- a/docs/classes.md
+++ b/docs/classes.md
@@ -53,6 +53,7 @@ The main tree class
 * `data` - The name of the rule or alias
 * `children` - List of matched sub-rules and terminals
 * `meta` - Line & Column numbers (unless `propagate_positions` is disabled)
+    * meta attributes: `line`, `column`, `start_pos`, `end_line`, `end_column`, `end_pos`
 
 #### \_\_init\_\_(self, data, children)
 
@@ -98,6 +99,7 @@ When using a lexer, the resulting tokens in the trees will be of the Token class
 * `column` - The column of the token in the text (starting with 1)
 * `end_line` - The line where the token ends
 * `end_column` - The next column after the end of the token. For example, if the token is a single character with a `column` value of 4, `end_column` will be 5.
+* `end_pos` - the index where the token ends (basically pos_in_stream + len(token))
 
 ## Transformer
 ## Visitor

From 2de7e347668270a255d4df77a00b4080738e51dd Mon Sep 17 00:00:00 2001
From: Jussi Laasonen <jussi.laasonen@lmu.de>
Date: Thu, 5 Dec 2019 14:58:27 +0100
Subject: [PATCH 100/132] Open imported grammars with UTF-8 encoding

---
 lark/load_grammar.py                    | 5 +++--
 tests/grammars/test_unicode.lark        | 1 +
 tests/test_parser.py                    | 6 ++++++
 tests/test_relative_import_unicode.lark | 3 +++
 4 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 tests/grammars/test_unicode.lark
 create mode 100644 tests/test_relative_import_unicode.lark

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index a65ca1e..bb8fc2f 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -4,6 +4,7 @@ import os.path
 import sys
 from ast import literal_eval
 from copy import copy, deepcopy
+from io import open
 
 from .utils import bfs
 from .lexer import Token, TerminalDef, PatternStr, PatternRE
@@ -580,13 +581,13 @@ def import_grammar(grammar_path, base_paths=[]):
         for import_path in import_paths:
             with suppress(IOError):
                 joined_path = os.path.join(import_path, grammar_path)
-                with open(joined_path) as f:
+                with open(joined_path, encoding='utf8') as f:
                     text = f.read()
                 grammar = load_grammar(text, joined_path)
                 _imported_grammars[grammar_path] = grammar
                 break
         else:
-            open(grammar_path)
+            open(grammar_path, encoding='utf8')
             assert False
 
     return _imported_grammars[grammar_path]
diff --git a/tests/grammars/test_unicode.lark b/tests/grammars/test_unicode.lark
new file mode 100644
index 0000000..9731d0a
--- /dev/null
+++ b/tests/grammars/test_unicode.lark
@@ -0,0 +1 @@
+UNICODE : /[a-zØ-öø-ÿ]/
\ No newline at end of file
diff --git a/tests/test_parser.py b/tests/test_parser.py
index caee80e..3004041 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1126,6 +1126,12 @@ def _make_parser_test(LEXER, PARSER):
             self.assertEqual(x.children, ['12', 'lions'])
 
 
+        def test_relative_import_unicode(self):
+            l = _Lark_open('test_relative_import_unicode.lark', rel_to=__file__)
+            x = l.parse(u'Ø')
+            self.assertEqual(x.children, [u'Ø'])
+
+
         def test_relative_import_rename(self):
             l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
             x = l.parse('12 lions')
diff --git a/tests/test_relative_import_unicode.lark b/tests/test_relative_import_unicode.lark
new file mode 100644
index 0000000..8010537
--- /dev/null
+++ b/tests/test_relative_import_unicode.lark
@@ -0,0 +1,3 @@
+start: UNICODE
+
+%import .grammars.test_unicode.UNICODE
\ No newline at end of file

From 616d2339b062e5fa93a995f6c58cc0ad283c141a Mon Sep 17 00:00:00 2001
From: Erez Shinan <erez27+git@gmail.com>
Date: Wed, 11 Dec 2019 12:47:56 +0200
Subject: [PATCH 101/132] Update README.md

---
 README.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b9a1bda..78107de 100644
--- a/README.md
+++ b/README.md
@@ -34,13 +34,16 @@ Lark has no dependencies.
 
 [![Build Status](https://travis-ci.org/lark-parser/lark.svg?branch=master)](https://travis-ci.org/lark-parser/lark)
 
-### Syntax Highlighting (new)
+### Syntax Highlighting
 
-Lark now provides syntax highlighting for its grammar files (\*.lark):
+Lark provides syntax highlighting for its grammar files (\*.lark):
 
 - [Sublime Text & TextMate](https://github.com/lark-parser/lark_syntax)
 - [vscode](https://github.com/lark-parser/vscode-lark)
 
+### Clones
+
+- [Lerchen (Julia)](https://github.com/jamesrhester/Lerchen.jl) - an unofficial clone, written entirely in Julia. 
 
 ### Hello World
 

From d693a172323488c9ba11796c0a48ef39ace79a3a Mon Sep 17 00:00:00 2001
From: Erez Shinan <erez27+git@gmail.com>
Date: Thu, 12 Dec 2019 09:24:51 +0200
Subject: [PATCH 102/132] Fixed link to Lerche

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 78107de..84e4921 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ Lark provides syntax highlighting for its grammar files (\*.lark):
 
 ### Clones
 
-- [Lerchen (Julia)](https://github.com/jamesrhester/Lerchen.jl) - an unofficial clone, written entirely in Julia. 
+- [Lerche (Julia)](https://github.com/jamesrhester/Lerche.jl) - an unofficial clone, written entirely in Julia. 
 
 ### Hello World
 

From e1a39c58d0a91b99777e954a6ba7573afae140f8 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sun, 8 Dec 2019 19:13:14 +0200
Subject: [PATCH 103/132] Refactored v_args & visitors to a better, more agile
 implementation

---
 lark/parse_tree_builder.py | 23 +++++++++++----
 lark/visitors.py           | 53 +++++++++++++++++++++++-----------
 tests/test_parser.py       | 58 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 111 insertions(+), 23 deletions(-)

diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py
index 3c47ef0..b50da43 100644
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -3,6 +3,7 @@ from .lexer import Token
 from .tree import Tree
 from .visitors import InlineTransformer # XXX Deprecated
 from .visitors import Transformer_InPlace
+from . import visitors
 
 ###{standalone
 from functools import partial, wraps
@@ -202,6 +203,15 @@ def inplace_transformer(func):
         return func(tree)
     return f
 
+def apply_visit_wrapper(func, name, wrapper):
+    if wrapper is visitors._vargs_meta or wrapper is visitors._vargs_meta_inline:
+        raise NotImplementedError("Meta args not supported for internal transformer")
+    @wraps(func)
+    def f(children):
+        return wrapper(func, name, children, None)
+    return f
+
+
 class ParseTreeBuilder:
     def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
         self.tree_class = tree_class
@@ -236,12 +246,15 @@ class ParseTreeBuilder:
             user_callback_name = rule.alias or rule.origin.name
             try:
                 f = getattr(transformer, user_callback_name)
-                assert not getattr(f, 'meta', False), "Meta args not supported for internal transformer"
                 # XXX InlineTransformer is deprecated!
-                if getattr(f, 'inline', False) or isinstance(transformer, InlineTransformer):
-                    f = ptb_inline_args(f)
-                elif hasattr(f, 'whole_tree') or isinstance(transformer, Transformer_InPlace):
-                    f = inplace_transformer(f)
+                wrapper = getattr(f, 'visit_wrapper', None)
+                if wrapper is not None:
+                    f = apply_visit_wrapper(f, user_callback_name, wrapper)
+                else:
+                    if isinstance(transformer, InlineTransformer):
+                        f = ptb_inline_args(f)
+                    elif isinstance(transformer, Transformer_InPlace):
+                        f = inplace_transformer(f)
             except AttributeError:
                 f = partial(self.tree_class, user_callback_name)
 
diff --git a/lark/visitors.py b/lark/visitors.py
index a2d5e86..da6b1d5 100644
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -35,14 +35,9 @@ class Transformer:
             return self.__default__(tree.data, children, tree.meta)
         else:
             try:
-                if getattr(f, 'meta', False):
-                    return f(children, tree.meta)
-                elif getattr(f, 'inline', False):
-                    return f(*children)
-                elif getattr(f, 'whole_tree', False):
-                    if new_children is not None:
-                        tree.children = new_children
-                    return f(tree)
+                wrapper = getattr(f, 'visit_wrapper', None)
+                if wrapper is not None:
+                    return f.visit_wrapper(f, tree.data, children, tree.meta)
                 else:
                     return f(children)
             except (GrammarError, Discard):
@@ -282,8 +277,7 @@ def inline_args(obj):   # XXX Deprecated
 
 
 
-def _visitor_args_func_dec(func, inline=False, meta=False, whole_tree=False, static=False):
-    assert [whole_tree, meta, inline].count(True) <= 1
+def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
     def create_decorator(_f, with_self):
         if with_self:
             def f(self, *args, **kwargs):
@@ -298,17 +292,42 @@ def _visitor_args_func_dec(func, inline=False, meta=False, whole_tree=False, sta
     else:
         f = smart_decorator(func, create_decorator)
     f.vargs_applied = True
-    f.inline = inline
-    f.meta = meta
-    f.whole_tree = whole_tree
+    f.visit_wrapper = visit_wrapper
     return f
 
-def v_args(inline=False, meta=False, tree=False):
+
+def _vargs_inline(f, data, children, meta):
+    return f(*children)
+def _vargs_meta_inline(f, data, children, meta):
+    return f(meta, *children)
+def _vargs_meta(f, data, children, meta):
+    return f(children, meta)   # TODO swap these for consistency? Backwards incompatible!
+def _vargs_tree(f, data, children, meta):
+    return f(Tree(data, children, meta))
+
+def v_args(inline=False, meta=False, tree=False, wrapper=None):
     "A convenience decorator factory, for modifying the behavior of user-supplied visitor methods"
-    if [tree, meta, inline].count(True) > 1:
-        raise ValueError("Visitor functions can either accept tree, or meta, or be inlined. These cannot be combined.")
+    if tree and (meta or inline):
+        raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
+
+    func = None
+    if meta:
+        if inline:
+            func = _vargs_meta_inline
+        else:
+            func = _vargs_meta
+    elif inline:
+        func = _vargs_inline
+    elif tree:
+        func = _vargs_tree
+
+    if wrapper is not None:
+        if func is not None:
+            raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
+        func = wrapper
+
     def _visitor_args_dec(obj):
-        return _apply_decorator(obj, _visitor_args_func_dec, inline=inline, meta=meta, whole_tree=tree)
+        return _apply_decorator(obj, _visitor_args_func_dec, visit_wrapper=func)
     return _visitor_args_dec
 
 
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 3004041..9a96305 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -5,6 +5,7 @@ import unittest
 import logging
 import os
 import sys
+from copy import deepcopy
 try:
     from cStringIO import StringIO as cStringIO
 except ImportError:
@@ -117,6 +118,61 @@ class TestParsers(unittest.TestCase):
         r = p.parse("x")
         self.assertEqual( r.children, ["X!"] )
 
+    def test_vargs_meta(self):
+
+        @v_args(meta=True)
+        class T1(Transformer):
+            def a(self, children, meta):
+                assert not children
+                return meta.line
+
+            def start(self, children, meta):
+                return children
+
+        @v_args(meta=True, inline=True)
+        class T2(Transformer):
+            def a(self, meta):
+                return meta.line
+
+            def start(self, meta, *res):
+                return list(res)
+
+        for T in (T1, T2):
+            for internal in [False, True]:
+                try:
+                    g = Lark(r"""start: a+
+                                a : "x" _NL?
+                                _NL: /\n/+
+                            """, parser='lalr', transformer=T() if internal else None)
+                except NotImplementedError:
+                    assert internal
+                    continue
+
+                res = g.parse("xx\nx\nxxx\n\n\nxx")
+                assert not internal
+                res = T().transform(res)
+
+                self.assertEqual(res, [1, 1, 2, 3, 3, 3, 6, 6])
+
+    def test_vargs_tree(self):
+        tree = Lark('''
+            start: a a a
+            !a: "A"
+        ''').parse('AAA')
+        tree_copy = deepcopy(tree)
+
+        @v_args(tree=True)
+        class T(Transformer):
+            def a(self, tree):
+                return 1
+            def start(self, tree):
+                return tree.children
+
+        res = T().transform(tree)
+        self.assertEqual(res, [1, 1, 1])
+        self.assertEqual(tree, tree_copy)
+
+
 
     def test_embedded_transformer(self):
         class T(Transformer):
@@ -188,7 +244,7 @@ class TestParsers(unittest.TestCase):
         @v_args(tree=True)
         class T2(Transformer):
             def a(self, tree):
-                assert isinstance(tree, Tree)
+                assert isinstance(tree, Tree), tree
                 tree.children.append("tested")
                 return tree
 

From 8842928963d265e35b9da2c1e2a2acadbee4151a Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Fri, 13 Dec 2019 09:30:41 +0200
Subject: [PATCH 104/132] Fixed multithreading bug in ContextualLexer (Issue
 #493)

---
 lark/lexer.py            | 19 +++++++------------
 lark/parser_frontends.py | 16 ++++++++++++----
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/lark/lexer.py b/lark/lexer.py
index 871b25e..9d26318 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -288,10 +288,7 @@ class Lexer(object):
 
     Method Signatures:
         lex(self, stream) -> Iterator[Token]
-
-        set_parser_state(self, state)   # Optional
     """
-    set_parser_state = NotImplemented
     lex = NotImplemented
 
 
@@ -349,6 +346,7 @@ class TraditionalLexer(Lexer):
 
 
 class ContextualLexer(Lexer):
+
     def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}):
         tokens_by_name = {}
         for t in terminals:
@@ -371,18 +369,15 @@ class ContextualLexer(Lexer):
 
         self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks)
 
-        self.set_parser_state(None) # Needs to be set on the outside
-
-    def set_parser_state(self, state):
-        self.parser_state = state
-
-    def lex(self, stream):
-        l = _Lex(self.lexers[self.parser_state], self.parser_state)
+    def lex(self, stream, get_parser_state):
+        parser_state = get_parser_state()
+        l = _Lex(self.lexers[parser_state], parser_state)
         try:
             for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
                 yield x
-                l.lexer = self.lexers[self.parser_state]
-                l.state = self.parser_state
+                parser_state = get_parser_state()
+                l.lexer = self.lexers[parser_state]
+                l.state = parser_state # For debug only, no need to worry about multithreading
         except UnexpectedCharacters as e:
             # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined,
             # but not in the current context.
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index ec82299..8b42772 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -79,14 +79,13 @@ class WithLexer(_ParserFrontend):
     def _serialize(self, data, memo):
         data['parser'] = data['parser'].serialize(memo)
 
-    def lex(self, text):
-        stream = self.lexer.lex(text)
+    def lex(self, *args):
+        stream = self.lexer.lex(*args)
         return self.postlex.process(stream) if self.postlex else stream
 
     def parse(self, text, start=None):
         token_stream = self.lex(text)
-        sps = self.lexer.set_parser_state
-        return self._parse(token_stream, start, *[sps] if sps is not NotImplemented else [])
+        return self._parse(token_stream, start)
 
     def init_traditional_lexer(self):
         self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
@@ -114,6 +113,15 @@ class LALR_ContextualLexer(LALR_WithLexer):
                                      ignore=self.lexer_conf.ignore,
                                      always_accept=always_accept,
                                      user_callbacks=self.lexer_conf.callbacks)
+
+
+    def parse(self, text, start=None):
+        parser_state = [None]
+        def set_parser_state(s):
+            parser_state[0] = s
+
+        token_stream = self.lex(text, lambda: parser_state[0])
+        return self._parse(token_stream, start, set_parser_state)
 ###}
 
 class LALR_CustomLexer(LALR_WithLexer):

From 0f699b1ebbe1487ecf221ec9e7eb4e37a2283b10 Mon Sep 17 00:00:00 2001
From: Ted Summer <ted.summer2@gmail.com>
Date: Tue, 17 Dec 2019 09:37:28 -0700
Subject: [PATCH 105/132] chore(docs): default for propagate_positions=False

---
 docs/classes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/classes.md b/docs/classes.md
index 021b2f4..6ec9dcc 100644
--- a/docs/classes.md
+++ b/docs/classes.md
@@ -32,7 +32,7 @@ The Lark class accepts a grammar string or file object, and keyword options:
 
 * **keep_all_tokens** - Don't throw away any terminals from the tree (Default=`False`)
 
-* **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`True`)
+* **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`False`)
 
 * **maybe_placeholders** - The `[]` operator returns `None` when not matched. Setting this to `False` makes it behave like the `?` operator, and return no value at all, which may be a little faster (default=`True`)
 

From 9a0e7af4e23d51965afdda68d78c116d13a88de7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?N=C3=A9stor=20N=C3=A1poles?= <napulen@gmail.com>
Date: Wed, 18 Dec 2019 16:34:05 -0500
Subject: [PATCH 106/132] Adding one more project using Lark

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 84e4921..d38546f 100644
--- a/README.md
+++ b/README.md
@@ -146,6 +146,7 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail
  - [required](https://github.com/shezadkhan137/required) - multi-field validation using docstrings
  - [miniwdl](https://github.com/chanzuckerberg/miniwdl) - A static analysis toolkit for the Workflow Description Language 
  - [pytreeview](https://gitlab.com/parmenti/pytreeview) - a lightweight tree-based grammar explorer
+ - [harmalysis](https://github.com/napulen/harmalysis) - A language for harmonic analysis and music theory
 
 
 Using Lark? Send me a message and I'll add your project!

From dcc9d46eef56dd0fb9633a2c3dc7c223f347baeb Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 25 Dec 2019 10:51:04 +0200
Subject: [PATCH 107/132] Fixes to propagate_positions

---
 lark/parse_tree_builder.py | 4 ++--
 tests/__main__.py          | 2 +-
 tests/test_parser.py       | 8 ++++++++
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py
index b50da43..6d298f4 100644
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -29,7 +29,7 @@ class PropagatePositions:
 
         if isinstance(res, Tree):
             for c in children:
-                if isinstance(c, Tree) and c.children and not c.meta.empty:
+                if isinstance(c, Tree) and not c.meta.empty:
                     res.meta.line = c.meta.line
                     res.meta.column = c.meta.column
                     res.meta.start_pos = c.meta.start_pos
@@ -43,7 +43,7 @@ class PropagatePositions:
                     break
 
             for c in reversed(children):
-                if isinstance(c, Tree) and c.children and not c.meta.empty:
+                if isinstance(c, Tree) and not c.meta.empty:
                     res.meta.end_line = c.meta.end_line
                     res.meta.end_column = c.meta.end_column
                     res.meta.end_pos = c.meta.end_pos
diff --git a/tests/__main__.py b/tests/__main__.py
index 901f101..477789f 100644
--- a/tests/__main__.py
+++ b/tests/__main__.py
@@ -10,7 +10,7 @@ from .test_reconstructor import TestReconstructor
 try:
     from .test_nearley.test_nearley import TestNearley
 except ImportError:
-    logging.warn("Warning: Skipping tests for Nearley (js2py required)")
+    logging.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")
 
 # from .test_selectors import TestSelectors
 # from .test_grammars import TestPythonG, TestConfigG
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 9a96305..3f73990 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -63,6 +63,14 @@ class TestParsers(unittest.TestCase):
         r = g.parse('a')
         self.assertEqual( r.children[0].meta.line, 1 )
 
+        g = Lark("""start: x
+                    x: a
+                    a: "a"
+                 """, propagate_positions=True)
+
+        r = g.parse('a')
+        self.assertEqual( r.children[0].meta.line, 1 )
+
     def test_expand1(self):
 
         g = Lark("""start: a

From f7a6366b6c2e3a6963bceb11a4e0cdbd5a41b7c2 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 25 Dec 2019 11:19:10 +0200
Subject: [PATCH 108/132] Make the JSON parser fast again

---
 examples/json_parser.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/examples/json_parser.py b/examples/json_parser.py
index ba1ff1e..7aa7d0f 100644
--- a/examples/json_parser.py
+++ b/examples/json_parser.py
@@ -49,11 +49,21 @@ class TreeToJson(Transformer):
     false = lambda self, _: False
 
 
+### Create the JSON parser with Lark, using the Earley algorithm
 # json_parser = Lark(json_grammar, parser='earley', lexer='standard')
 # def parse(x):
 #     return TreeToJson().transform(json_parser.parse(x))
 
-json_parser = Lark(json_grammar, parser='lalr', lexer='standard', transformer=TreeToJson())
+### Create the JSON parser with Lark, using the LALR algorithm
+json_parser = Lark(json_grammar, parser='lalr',
+                   # Using the standard lexer isn't required, and isn't usually recommended.
+                   # But, it's good enough for JSON, and it's slightly faster.
+                   lexer='standard',
+                   # Disabling propagate_positions and placeholders slightly improves speed
+                   propagate_positions=False,
+                   maybe_placeholders=False,
+                   # Using an internal transformer is faster and more memory efficient
+                   transformer=TreeToJson())
 parse = json_parser.parse
 
 

From fbbea5f73093dea5de4867b92831d44c38f60497 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 25 Dec 2019 11:27:29 +0200
Subject: [PATCH 109/132] Removed deprecated feature - profile

---
 lark/lark.py | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/lark/lark.py b/lark/lark.py
index d334cc7..6e51914 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -44,7 +44,6 @@ class LarkOptions(Serialize):
         cache_grammar - Cache the Lark grammar (Default: False)
         postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
         start - The start symbol, either a string, or a list of strings for multiple possible starts (Default: "start")
-        profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
         priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto)
         propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
         lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
@@ -63,7 +62,6 @@ class LarkOptions(Serialize):
         'lexer': 'auto',
         'transformer': None,
         'start': 'start',
-        'profile': False,
         'priority': 'auto',
         'ambiguity': 'auto',
         'propagate_positions': True,
@@ -114,30 +112,6 @@ class LarkOptions(Serialize):
         return cls(data)
 
 
-class Profiler:
-    def __init__(self):
-        self.total_time = defaultdict(float)
-        self.cur_section = '__init__'
-        self.last_enter_time = time.time()
-
-    def enter_section(self, name):
-        cur_time = time.time()
-        self.total_time[self.cur_section] += cur_time - self.last_enter_time
-        self.last_enter_time = cur_time
-        self.cur_section = name
-
-    def make_wrapper(self, name, f):
-        def wrapper(*args, **kwargs):
-            last_section = self.cur_section
-            self.enter_section(name)
-            try:
-                return f(*args, **kwargs)
-            finally:
-                self.enter_section(last_section)
-
-        return wrapper
-
-
 class Lark(Serialize):
     def __init__(self, grammar, **options):
         """
@@ -165,9 +139,6 @@ class Lark(Serialize):
         if self.options.cache_grammar:
             raise NotImplementedError("Not available yet")
 
-        assert not self.options.profile, "Feature temporarily disabled"
-        # self.profiler = Profiler() if self.options.profile else None
-
         if self.options.lexer == 'auto':
             if self.options.parser == 'lalr':
                 self.options.lexer = 'contextual'

From b9c81a54508baf7fe8f96aac7364dfa00608ff42 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 25 Dec 2019 11:39:09 +0200
Subject: [PATCH 110/132] Refactor: Simplify code by assuming rule.options is
 never None

---
 examples/standalone/json_parser.py | 9 ++++-----
 lark/grammar.py                    | 2 +-
 lark/lark.py                       | 4 ++--
 lark/load_grammar.py               | 4 ++--
 lark/parse_tree_builder.py         | 6 +++---
 lark/parsers/cyk.py                | 2 +-
 lark/parsers/earley.py             | 2 +-
 lark/parsers/earley_forest.py      | 2 +-
 lark/parsers/lalr_parser.py        | 3 +--
 lark/reconstruct.py                | 2 +-
 10 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/examples/standalone/json_parser.py b/examples/standalone/json_parser.py
index d424f1b..73acf9c 100644
--- a/examples/standalone/json_parser.py
+++ b/examples/standalone/json_parser.py
@@ -1305,8 +1305,7 @@ class ParseTreeBuilder:
 
 class LALR_Parser(object):
     def __init__(self, parser_conf, debug=False):
-        assert all(r.options is None or r.options.priority is None
-                   for r in parser_conf.rules), "LALR doesn't yet support prioritization"
+        assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization"
         analysis = LALR_Analyzer(parser_conf, debug=debug)
         analysis.compute_lookahead()
         callbacks = parser_conf.callbacks
@@ -1508,7 +1507,7 @@ class WithLexer(Serialize):
         inst.postlex = postlex
         inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
         return inst
-    
+
     def _serialize(self, data, memo):
         data['parser'] = data['parser'].serialize(memo)
 
@@ -1740,14 +1739,14 @@ class Lark(Serialize):
         # This replaces the old 'resolve__antiscore_sum' option.
         if self.options.priority == 'invert':
             for rule in self.rules:
-                if rule.options and rule.options.priority is not None:
+                if rule.options.priority is not None:
                     rule.options.priority = -rule.options.priority
         # Else, if the user asked to disable priorities, strip them from the
         # rules. This allows the Earley parsers to skip an extra forest walk
         # for improved performance, if you don't need them (or didn't specify any).
         elif self.options.priority == None:
             for rule in self.rules:
-                if rule.options and rule.options.priority is not None:
+                if rule.options.priority is not None:
                     rule.options.priority = None
         self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)
 
diff --git a/lark/grammar.py b/lark/grammar.py
index 91435b2..cf8cf64 100644
--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -81,7 +81,7 @@ class Rule(Serialize):
         self.expansion = expansion
         self.alias = alias
         self.order = order
-        self.options = options
+        self.options = options or RuleOptions()
         self._hash = hash((self.origin, tuple(self.expansion)))
 
     def _deserialize(self):
diff --git a/lark/lark.py b/lark/lark.py
index 6e51914..36cb4b6 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -187,14 +187,14 @@ class Lark(Serialize):
         # This replaces the old 'resolve__antiscore_sum' option.
         if self.options.priority == 'invert':
             for rule in self.rules:
-                if rule.options and rule.options.priority is not None:
+                if rule.options.priority is not None:
                     rule.options.priority = -rule.options.priority
         # Else, if the user asked to disable priorities, strip them from the
         # rules. This allows the Earley parsers to skip an extra forest walk
         # for improved performance, if you don't need them (or didn't specify any).
         elif self.options.priority == None:
             for rule in self.rules:
-                if rule.options and rule.options.priority is not None:
+                if rule.options.priority is not None:
                     rule.options.priority = None
 
         # TODO Deprecate lexer_callbacks?
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index bb8fc2f..2cd834c 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -503,7 +503,7 @@ class Grammar:
         ebnf_to_bnf = EBNF_to_BNF()
         rules = []
         for name, rule_tree, options in rule_defs:
-            ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
+            ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options.keep_all_tokens else None
             tree = transformer.transform(rule_tree)
             res = ebnf_to_bnf.transform(tree)
             rules.append((name, res, options))
@@ -527,7 +527,7 @@ class Grammar:
 
                 empty_indices = [x==_EMPTY for x in expansion]
                 if any(empty_indices):
-                    exp_options = copy(options) if options else RuleOptions()
+                    exp_options = copy(options)
                     exp_options.empty_indices = empty_indices
                     expansion = [x for x in expansion if x!=_EMPTY]
                 else:
diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py
index 6d298f4..4ee0071 100644
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -225,12 +225,12 @@ class ParseTreeBuilder:
     def _init_builders(self, rules):
         for rule in rules:
             options = rule.options
-            keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False)
-            expand_single_child = options.expand1 if options else False
+            keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens
+            expand_single_child = options.expand1
 
             wrapper_chain = list(filter(None, [
                 (expand_single_child and not rule.alias) and ExpandSingleChild,
-                maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None),
+                maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
                 self.propagate_positions and PropagatePositions,
                 self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
             ]))
diff --git a/lark/parsers/cyk.py b/lark/parsers/cyk.py
index 7b25609..ff0924f 100644
--- a/lark/parsers/cyk.py
+++ b/lark/parsers/cyk.py
@@ -96,7 +96,7 @@ class Parser(object):
         assert all(isinstance(x, Symbol) for x in lark_rule.expansion)
         return Rule(
             lark_rule.origin, lark_rule.expansion,
-            weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0,
+            weight=lark_rule.options.priority if lark_rule.options.priority else 0,
             alias=lark_rule)
 
     def parse(self, tokenized, start):  # pylint: disable=invalid-name
diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index e18d26c..a4ffead 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -45,7 +45,7 @@ class Parser:
             #  the priorities will be stripped from all rules before they reach us, allowing us to
             #  skip the extra tree walk. We'll also skip this if the user just didn't specify priorities
             #  on any rules.
-            if self.forest_sum_visitor is None and rule.options and rule.options.priority is not None:
+            if self.forest_sum_visitor is None and rule.options.priority is not None:
                 self.forest_sum_visitor = ForestSumVisitor
 
         self.term_matcher = term_matcher
diff --git a/lark/parsers/earley_forest.py b/lark/parsers/earley_forest.py
index e6179e6..c8b4f25 100644
--- a/lark/parsers/earley_forest.py
+++ b/lark/parsers/earley_forest.py
@@ -250,7 +250,7 @@ class ForestSumVisitor(ForestVisitor):
         return iter(node.children)
 
     def visit_packed_node_out(self, node):
-        priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options and node.rule.options.priority else 0
+        priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options.priority else 0
         priority += getattr(node.right, 'priority', 0)
         priority += getattr(node.left, 'priority', 0)
         node.priority = priority
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index 82c8bba..4265ca5 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -12,8 +12,7 @@ from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
 ###{standalone
 class LALR_Parser(object):
     def __init__(self, parser_conf, debug=False):
-        assert all(r.options is None or r.options.priority is None
-                   for r in parser_conf.rules), "LALR doesn't yet support prioritization"
+        assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization"
         analysis = LALR_Analyzer(parser_conf, debug=debug)
         analysis.compute_lalr()
         callbacks = parser_conf.callbacks
diff --git a/lark/reconstruct.py b/lark/reconstruct.py
index b7a6659..bd7b6a0 100644
--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -108,7 +108,7 @@ class Reconstructor:
                                     self._match, resolve_ambiguity=True)
 
     def _build_recons_rules(self, rules):
-        expand1s = {r.origin for r in rules if r.options and r.options.expand1}
+        expand1s = {r.origin for r in rules if r.options.expand1}
 
         aliases = defaultdict(list)
         for r in rules:

From f0da22e9a849bd700b67bd550a11b1eefb3235be Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 26 Dec 2019 19:42:01 +0200
Subject: [PATCH 111/132] LarkOptions now raises AttributeError instead of
 KeyError (Issue #503)

---
 lark/lark.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lark/lark.py b/lark/lark.py
index 36cb4b6..ea4f46a 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -99,7 +99,11 @@ class LarkOptions(Serialize):
             raise ValueError("Unknown options: %s" % o.keys())
 
     def __getattr__(self, name):
-        return self.options[name]
+        try:
+            return self.options[name]
+        except KeyError as e:
+            raise AttributeError(e)
+
     def __setattr__(self, name, value):
         assert name in self.options
         self.options[name] = value

From b2f1b3bf7c63d980f69025d467064ba504f6a279 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 11 Jan 2020 16:05:29 +0200
Subject: [PATCH 112/132] Small fixes

---
 examples/standalone/json_parser.py | 608 ++++++++++++++++++++---------
 lark/lark.py                       |   2 -
 lark/lexer.py                      |   2 +-
 lark/load_grammar.py               |  35 +-
 lark/parsers/lalr_analysis.py      |   2 +-
 lark/tools/standalone.py           |   3 +
 lark/utils.py                      |  26 ++
 7 files changed, 451 insertions(+), 227 deletions(-)

diff --git a/examples/standalone/json_parser.py b/examples/standalone/json_parser.py
index 73acf9c..f270ade 100644
--- a/examples/standalone/json_parser.py
+++ b/examples/standalone/json_parser.py
@@ -1,4 +1,4 @@
-# The file was automatically generated by Lark v0.7.0
+# The file was automatically generated by Lark v0.8.0rc1
 #
 #
 #   Lark Stand-alone Generator Tool
@@ -35,6 +35,9 @@
 #
 #
 
+import os
+from io import open
+
 class LarkError(Exception):
     pass
 
@@ -47,6 +50,14 @@ class ParseError(LarkError):
 class LexError(LarkError):
     pass
 
+class UnexpectedEOF(ParseError):
+    def __init__(self, expected):
+        self.expected = expected
+
+        message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
+        super(UnexpectedEOF, self).__init__(message)
+
+
 class UnexpectedInput(LarkError):
     pos_in_stream = None
 
@@ -86,7 +97,7 @@ class UnexpectedInput(LarkError):
 
 
 class UnexpectedCharacters(LexError, UnexpectedInput):
-    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None):
+    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
         message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
 
         self.line = line
@@ -99,6 +110,8 @@ class UnexpectedCharacters(LexError, UnexpectedInput):
         message += '\n\n' + self.get_context(seq)
         if allowed:
             message += '\nExpecting: %s\n' % allowed
+        if token_history:
+            message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in token_history)
 
         super(UnexpectedCharacters, self).__init__(message)
 
@@ -121,13 +134,25 @@ class UnexpectedToken(ParseError, UnexpectedInput):
         super(UnexpectedToken, self).__init__(message)
 
 class VisitError(LarkError):
-    def __init__(self, tree, orig_exc):
-        self.tree = tree
+    def __init__(self, rule, obj, orig_exc):
+        self.obj = obj
         self.orig_exc = orig_exc
 
-        message = 'Error trying to process rule "%s":\n\n%s' % (tree.data, orig_exc)
+        message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
         super(VisitError, self).__init__(message)
 
+def classify(seq, key=None, value=None):
+    d = {}
+    for item in seq:
+        k = key(item) if (key is not None) else item
+        v = value(item) if (value is not None) else item
+        if k in d:
+            d[k].append(v)
+        else:
+            d[k] = [v]
+    return d
+
+
 def _deserialize(data, namespace, memo):
     if isinstance(data, dict):
         if '__type__' in data: # Object
@@ -170,7 +195,10 @@ class Serialize(object):
 
         inst = cls.__new__(cls)
         for f in fields:
-            setattr(inst, f, _deserialize(data[f], namespace, memo))
+            try:
+                setattr(inst, f, _deserialize(data[f], namespace, memo))
+            except KeyError as e:
+                raise KeyError("Cannot find key for class", cls, e)
         postprocess = getattr(inst, '_deserialize', None)
         if postprocess:
             postprocess()
@@ -224,7 +252,7 @@ def smart_decorator(f, create_decorator):
 
     elif isinstance(f, partial):
         # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445
-        return create_decorator(f.__func__, True)
+        return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True))
 
     else:
         return create_decorator(f.__func__.__call__, True)
@@ -232,6 +260,15 @@ def smart_decorator(f, create_decorator):
 import sys, re
 Py36 = (sys.version_info[:2] >= (3, 6))
 
+import sre_parse
+import sre_constants
+def get_regexp_width(regexp):
+    try:
+        return [int(x) for x in sre_parse.parse(regexp).getwidth()]
+    except sre_constants.error:
+        raise ValueError(regexp)
+
+
 class Meta:
     def __init__(self):
         self.empty = True
@@ -282,6 +319,36 @@ class Tree(object):
     def __hash__(self):
         return hash((self.data, tuple(self.children)))
 
+    def iter_subtrees(self):
+        # TODO: Re-write as a more efficient version
+
+        visited = set()
+        q = [self]
+
+        l = []
+        while q:
+            subtree = q.pop()
+            l.append( subtree )
+            if id(subtree) in visited:
+                continue    # already been here from another branch
+            visited.add(id(subtree))
+            q += [c for c in subtree.children if isinstance(c, Tree)]
+
+        seen = set()
+        for x in reversed(l):
+            if id(x) not in seen:
+                yield x
+                seen.add(id(x))
+
+    def find_pred(self, pred):
+        "Find all nodes where pred(tree) == True"
+        return filter(pred, self.iter_subtrees())
+
+    def find_data(self, data):
+        "Find all nodes where tree.data == data"
+        return self.find_pred(lambda t: t.data == data)
+
+
 from inspect import getmembers, getmro
 
 class Discard(Exception):
@@ -298,6 +365,10 @@ class Transformer:
     Can be used to implement map or reduce.
     """
 
+    __visit_tokens__ = True   # For backwards compatibility
+    def __init__(self,  visit_tokens=True):
+        self.__visit_tokens__ = visit_tokens
+
     def _call_userfunc(self, tree, new_children=None):
         # Assumes tree is already transformed
         children = new_children if new_children is not None else tree.children
@@ -307,25 +378,39 @@ class Transformer:
             return self.__default__(tree.data, children, tree.meta)
         else:
             try:
-                if getattr(f, 'meta', False):
-                    return f(children, tree.meta)
-                elif getattr(f, 'inline', False):
-                    return f(*children)
-                elif getattr(f, 'whole_tree', False):
-                    if new_children is not None:
-                        raise NotImplementedError("Doesn't work with the base Transformer class")
-                    return f(tree)
+                wrapper = getattr(f, 'visit_wrapper', None)
+                if wrapper is not None:
+                    return f.visit_wrapper(f, tree.data, children, tree.meta)
                 else:
                     return f(children)
             except (GrammarError, Discard):
                 raise
             except Exception as e:
-                raise VisitError(tree, e)
+                raise VisitError(tree.data, tree, e)
+
+    def _call_userfunc_token(self, token):
+        try:
+            f = getattr(self, token.type)
+        except AttributeError:
+            return self.__default_token__(token)
+        else:
+            try:
+                return f(token)
+            except (GrammarError, Discard):
+                raise
+            except Exception as e:
+                raise VisitError(token.type, token, e)
+
 
     def _transform_children(self, children):
         for c in children:
             try:
-                yield self._transform_tree(c) if isinstance(c, Tree) else c
+                if isinstance(c, Tree):
+                    yield self._transform_tree(c)
+                elif self.__visit_tokens__ and isinstance(c, Token):
+                    yield self._call_userfunc_token(c)
+                else:
+                    yield c
             except Discard:
                 pass
 
@@ -343,13 +428,20 @@ class Transformer:
         "Default operation on tree (for override)"
         return Tree(data, children, meta)
 
+    def __default_token__(self, token):
+        "Default operation on token (for override)"
+        return token
+
+
     @classmethod
     def _apply_decorator(cls, decorator, **kwargs):
         mro = getmro(cls)
         assert mro[0] is cls
         libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
         for name, value in getmembers(cls):
-            if name.startswith('_') or name in libmembers:
+
+            # Make sure the function isn't inherited (unless it's overwritten)
+            if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
                 continue
             if not callable(cls.__dict__[name]):
                 continue
@@ -432,6 +524,11 @@ class Visitor(VisitorBase):
             self._call_userfunc(subtree)
         return tree
 
+    def visit_topdown(self,tree):
+        for subtree in tree.iter_subtrees_topdown():
+            self._call_userfunc(subtree)
+        return tree
+
 class Visitor_Recursive(VisitorBase):
     """Bottom-up visitor, recursive
 
@@ -444,8 +541,16 @@ class Visitor_Recursive(VisitorBase):
             if isinstance(child, Tree):
                 self.visit(child)
 
-        f = getattr(self, tree.data, self.__default__)
-        f(tree)
+        self._call_userfunc(tree)
+        return tree
+
+    def visit_topdown(self,tree):
+        self._call_userfunc(tree)
+
+        for child in tree.children:
+            if isinstance(child, Tree):
+                self.visit_topdown(child)
+
         return tree
 
 
@@ -515,8 +620,7 @@ def inline_args(obj):   # XXX Deprecated
 
 
 
-def _visitor_args_func_dec(func, inline=False, meta=False, whole_tree=False, static=False):
-    assert [whole_tree, meta, inline].count(True) <= 1
+def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
     def create_decorator(_f, with_self):
         if with_self:
             def f(self, *args, **kwargs):
@@ -531,17 +635,42 @@ def _visitor_args_func_dec(func, inline=False, meta=False, whole_tree=False, sta
     else:
         f = smart_decorator(func, create_decorator)
     f.vargs_applied = True
-    f.inline = inline
-    f.meta = meta
-    f.whole_tree = whole_tree
+    f.visit_wrapper = visit_wrapper
     return f
 
-def v_args(inline=False, meta=False, tree=False):
+
+def _vargs_inline(f, data, children, meta):
+    return f(*children)
+def _vargs_meta_inline(f, data, children, meta):
+    return f(meta, *children)
+def _vargs_meta(f, data, children, meta):
+    return f(children, meta)   # TODO swap these for consistency? Backwards incompatible!
+def _vargs_tree(f, data, children, meta):
+    return f(Tree(data, children, meta))
+
+def v_args(inline=False, meta=False, tree=False, wrapper=None):
     "A convenience decorator factory, for modifying the behavior of user-supplied visitor methods"
-    if [tree, meta, inline].count(True) > 1:
-        raise ValueError("Visitor functions can either accept tree, or meta, or be inlined. These cannot be combined.")
+    if tree and (meta or inline):
+        raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.")
+
+    func = None
+    if meta:
+        if inline:
+            func = _vargs_meta_inline
+        else:
+            func = _vargs_meta
+    elif inline:
+        func = _vargs_inline
+    elif tree:
+        func = _vargs_tree
+
+    if wrapper is not None:
+        if func is not None:
+            raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.")
+        func = wrapper
+
     def _visitor_args_dec(obj):
-        return _apply_decorator(obj, _visitor_args_func_dec, inline=inline, meta=meta, whole_tree=tree)
+        return _apply_decorator(obj, _visitor_args_func_dec, visit_wrapper=func)
     return _visitor_args_dec
 
 
@@ -604,6 +733,8 @@ class Indenter:
 
 
 class Symbol(Serialize):
+    __slots__ = ('name',)
+
     is_term = NotImplemented
 
     def __init__(self, name):
@@ -680,7 +811,7 @@ class Rule(Serialize):
         self.expansion = expansion
         self.alias = alias
         self.order = order
-        self.options = options
+        self.options = options or RuleOptions()
         self._hash = hash((self.origin, tuple(self.expansion)))
 
     def _deserialize(self):
@@ -705,7 +836,6 @@ class Rule(Serialize):
 
 
 class Pattern(Serialize):
-    __serialize_fields__ = 'value', 'flags'
 
     def __init__(self, value, flags=()):
         self.value = value
@@ -738,6 +868,10 @@ class Pattern(Serialize):
 
 
 class PatternStr(Pattern):
+    __serialize_fields__ = 'value', 'flags'
+
+    type = "str"
+
     def to_regexp(self):
         return self._get_flags(re.escape(self.value))
 
@@ -747,15 +881,25 @@ class PatternStr(Pattern):
     max_width = min_width
 
 class PatternRE(Pattern):
+    __serialize_fields__ = 'value', 'flags', '_width'
+
+    type = "re"
+
     def to_regexp(self):
         return self._get_flags(self.value)
 
+    _width = None
+    def _get_width(self):
+        if self._width is None:
+            self._width = get_regexp_width(self.to_regexp())
+        return self._width
+
     @property
     def min_width(self):
-        return get_regexp_width(self.to_regexp())[0]
+        return self._get_width()[0]
     @property
     def max_width(self):
-        return get_regexp_width(self.to_regexp())[1]
+        return self._get_width()[1]
 
 
 class TerminalDef(Serialize):
@@ -774,9 +918,9 @@ class TerminalDef(Serialize):
 
 
 class Token(Str):
-    __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column')
+    __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')
 
-    def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None):
+    def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
         try:
             self = super(Token, cls).__new__(cls, value)
         except UnicodeDecodeError:
@@ -790,11 +934,19 @@ class Token(Str):
         self.column = column
         self.end_line = end_line
         self.end_column = end_column
+        self.end_pos = end_pos
         return self
 
+    def update(self, type_=None, value=None):
+        return Token.new_borrow_pos(
+            type_ if type_ is not None else self.type,
+            value if value is not None else self.value,
+            self
+        )
+
     @classmethod
     def new_borrow_pos(cls, type_, value, borrow_t):
-        return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column)
+        return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)
 
     def __reduce__(self):
         return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, ))
@@ -846,38 +998,38 @@ class _Lex:
         newline_types = frozenset(newline_types)
         ignore_types = frozenset(ignore_types)
         line_ctr = LineCounter()
+        last_token = None
 
         while line_ctr.char_pos < len(stream):
             lexer = self.lexer
-            for mre, type_from_index in lexer.mres:
-                m = mre.match(stream, line_ctr.char_pos)
-                if not m:
-                    continue
-
-                t = None
-                value = m.group(0)
-                type_ = type_from_index[m.lastindex]
-                if type_ not in ignore_types:
-                    t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
-                    if t.type in lexer.callback:
-                        t = lexer.callback[t.type](t)
-                        if not isinstance(t, Token):
-                            raise ValueError("Callbacks must return a token (returned %r)" % t)
-                    yield t
-                else:
-                    if type_ in lexer.callback:
-                        t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
-                        lexer.callback[type_](t)
+            res = lexer.match(stream, line_ctr.char_pos)
+            if not res:
+                allowed = {v for m, tfi in lexer.mres for v in tfi.values()} - ignore_types
+                if not allowed:
+                    allowed = {"<END-OF-FILE>"}
+                raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token])
 
-                line_ctr.feed(value, type_ in newline_types)
-                if t:
-                    t.end_line = line_ctr.line
-                    t.end_column = line_ctr.column
+            value, type_ = res
 
-                break
+            if type_ not in ignore_types:
+                t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
+                line_ctr.feed(value, type_ in newline_types)
+                t.end_line = line_ctr.line
+                t.end_column = line_ctr.column
+                t.end_pos = line_ctr.char_pos
+                if t.type in lexer.callback:
+                    t = lexer.callback[t.type](t)
+                    if not isinstance(t, Token):
+                        raise ValueError("Callbacks must return a token (returned %r)" % t)
+                yield t
+                last_token = t
             else:
-                allowed = [v for m, tfi in lexer.mres for v in tfi.values()]
-                raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state)
+                if type_ in lexer.callback:
+                    t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column)
+                    lexer.callback[type_](t2)
+                line_ctr.feed(value, type_ in newline_types)
+
+
 
 
 class UnlessCallback:
@@ -950,34 +1102,25 @@ def build_mres(terminals, match_whole=False):
     return _build_mres(terminals, len(terminals), match_whole)
 
 def _regexp_has_newline(r):
-    """Expressions that may indicate newlines in a regexp:
+    r"""Expressions that may indicate newlines in a regexp:
         - newlines (\n)
         - escaped newline (\\n)
         - anything but ([^...])
         - any-char (.) when the flag (?s) exists
+        - spaces (\s)
     """
-    return '\n' in r or '\\n' in r or '[^' in r or ('(?s' in r and '.' in r)
+    return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)
 
-class Lexer(Serialize):
+class Lexer(object):
     """Lexer interface
 
     Method Signatures:
         lex(self, stream) -> Iterator[Token]
-
-        set_parser_state(self, state)   # Optional
     """
-    set_parser_state = NotImplemented
     lex = NotImplemented
 
 
 class TraditionalLexer(Lexer):
-    __serialize_fields__ = 'terminals', 'ignore_types', 'newline_types'
-    __serialize_namespace__ = TerminalDef,
-
-    def _deserialize(self):
-        self.mres = build_mres(self.terminals)
-        self.callback = {}  # TODO implement
-
 
     def __init__(self, terminals, ignore=(), user_callbacks={}):
         assert all(isinstance(t, TerminalDef) for t in terminals), terminals
@@ -988,7 +1131,7 @@ class TraditionalLexer(Lexer):
         for t in terminals:
             try:
                 re.compile(t.pattern.to_regexp())
-            except:
+            except re.error:
                 raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
 
             if t.pattern.min_width == 0:
@@ -1001,21 +1144,28 @@ class TraditionalLexer(Lexer):
         self.ignore_types = list(ignore)
 
         terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name))
+        self.terminals = terminals
+        self.user_callbacks = user_callbacks
+        self.build()
 
-        terminals, self.callback = _create_unless(terminals)
+    def build(self):
+        terminals, self.callback = _create_unless(self.terminals)
         assert all(self.callback.values())
 
-        for type_, f in user_callbacks.items():
+        for type_, f in self.user_callbacks.items():
             if type_ in self.callback:
                 # Already a callback there, probably UnlessCallback
                 self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_)
             else:
                 self.callback[type_] = f
 
-        self.terminals = terminals
-
         self.mres = build_mres(terminals)
 
+    def match(self, stream, pos):
+        for mre, type_from_index in self.mres:
+            m = mre.match(stream, pos)
+            if m:
+                return m.group(0), type_from_index[m.lastindex]
 
     def lex(self, stream):
         return _Lex(self).lex(stream, self.newline_types, self.ignore_types)
@@ -1024,8 +1174,6 @@ class TraditionalLexer(Lexer):
 
 
 class ContextualLexer(Lexer):
-    __serialize_fields__ = 'root_lexer', 'lexers'
-    __serialize_namespace__ = TraditionalLexer,
 
     def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}):
         tokens_by_name = {}
@@ -1049,17 +1197,41 @@ class ContextualLexer(Lexer):
 
         self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks)
 
-        self.set_parser_state(None) # Needs to be set on the outside
+    def lex(self, stream, get_parser_state):
+        parser_state = get_parser_state()
+        l = _Lex(self.lexers[parser_state], parser_state)
+        try:
+            for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
+                yield x
+                parser_state = get_parser_state()
+                l.lexer = self.lexers[parser_state]
+                l.state = parser_state # For debug only, no need to worry about multithreading
+        except UnexpectedCharacters as e:
+            # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined,
+            # but not in the current context.
+            # This tests the input against the global context, to provide a nicer error.
+            root_match = self.root_lexer.match(stream, e.pos_in_stream)
+            if not root_match:
+                raise
 
-    def set_parser_state(self, state):
-        self.parser_state = state
+            value, type_ = root_match
+            t = Token(type_, value, e.pos_in_stream, e.line, e.column)
+            raise UnexpectedToken(t, e.allowed, state=e.state)
 
-    def lex(self, stream):
-        l = _Lex(self.lexers[self.parser_state], self.parser_state)
-        for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
-            yield x
-            l.lexer = self.lexers[self.parser_state]
-            l.state = self.parser_state
+
+
+class LexerConf(Serialize):
+    __serialize_fields__ = 'tokens', 'ignore'
+    __serialize_namespace__ = TerminalDef,
+
+    def __init__(self, tokens, ignore=(), postlex=None, callbacks=None):
+        self.tokens = tokens
+        self.ignore = ignore
+        self.postlex = postlex
+        self.callbacks = callbacks or {}
+
+    def _deserialize(self):
+        self.callbacks = {} # TODO
 
 
 from functools import partial, wraps
@@ -1085,7 +1257,7 @@ class PropagatePositions:
 
         if isinstance(res, Tree):
             for c in children:
-                if isinstance(c, Tree) and c.children and not c.meta.empty:
+                if isinstance(c, Tree) and not c.meta.empty:
                     res.meta.line = c.meta.line
                     res.meta.column = c.meta.column
                     res.meta.start_pos = c.meta.start_pos
@@ -1099,7 +1271,7 @@ class PropagatePositions:
                     break
 
             for c in reversed(children):
-                if isinstance(c, Tree) and c.children and not c.meta.empty:
+                if isinstance(c, Tree) and not c.meta.empty:
                     res.meta.end_line = c.meta.end_line
                     res.meta.end_column = c.meta.end_column
                     res.meta.end_pos = c.meta.end_pos
@@ -1108,7 +1280,7 @@ class PropagatePositions:
                 elif isinstance(c, Token):
                     res.meta.end_line = c.end_line
                     res.meta.end_column = c.end_column
-                    res.meta.end_pos = c.pos_in_stream + len(c.value)
+                    res.meta.end_pos = c.end_pos
                     res.meta.empty = False
                     break
 
@@ -1251,6 +1423,23 @@ def ptb_inline_args(func):
         return func(*children)
     return f
 
+def inplace_transformer(func):
+    @wraps(func)
+    def f(children):
+        # function name in a Transformer is a rule name.
+        tree = Tree(func.__name__, children)
+        return func(tree)
+    return f
+
+def apply_visit_wrapper(func, name, wrapper):
+    if wrapper is visitors._vargs_meta or wrapper is visitors._vargs_meta_inline:
+        raise NotImplementedError("Meta args not supported for internal transformer")
+    @wraps(func)
+    def f(children):
+        return wrapper(func, name, children, None)
+    return f
+
+
 class ParseTreeBuilder:
     def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
         self.tree_class = tree_class
@@ -1264,12 +1453,12 @@ class ParseTreeBuilder:
     def _init_builders(self, rules):
         for rule in rules:
             options = rule.options
-            keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False)
-            expand_single_child = options.expand1 if options else False
+            keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens
+            expand_single_child = options.expand1
 
             wrapper_chain = list(filter(None, [
                 (expand_single_child and not rule.alias) and ExpandSingleChild,
-                maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None),
+                maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
                 self.propagate_positions and PropagatePositions,
                 self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
             ]))
@@ -1285,10 +1474,15 @@ class ParseTreeBuilder:
             user_callback_name = rule.alias or rule.origin.name
             try:
                 f = getattr(transformer, user_callback_name)
-                assert not getattr(f, 'meta', False), "Meta args not supported for internal transformer"
                 # XXX InlineTransformer is deprecated!
-                if getattr(f, 'inline', False) or isinstance(transformer, InlineTransformer):
-                    f = ptb_inline_args(f)
+                wrapper = getattr(f, 'visit_wrapper', None)
+                if wrapper is not None:
+                    f = apply_visit_wrapper(f, user_callback_name, wrapper)
+                else:
+                    if isinstance(transformer, InlineTransformer):
+                        f = ptb_inline_args(f)
+                    elif isinstance(transformer, Transformer_InPlace):
+                        f = inplace_transformer(f)
             except AttributeError:
                 f = partial(self.tree_class, user_callback_name)
 
@@ -1307,7 +1501,7 @@ class LALR_Parser(object):
     def __init__(self, parser_conf, debug=False):
         assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization"
         analysis = LALR_Analyzer(parser_conf, debug=debug)
-        analysis.compute_lookahead()
+        analysis.compute_lalr()
         callbacks = parser_conf.callbacks
 
         self._parse_table = analysis.parse_table
@@ -1317,7 +1511,8 @@ class LALR_Parser(object):
     @classmethod
     def deserialize(cls, data, memo, callbacks):
         inst = cls.__new__(cls)
-        inst.parser = _Parser(IntParseTable.deserialize(data, memo), callbacks)
+        inst._parse_table = IntParseTable.deserialize(data, memo)
+        inst.parser = _Parser(inst._parse_table, callbacks)
         return inst
 
     def serialize(self, memo):
@@ -1330,19 +1525,22 @@ class LALR_Parser(object):
 class _Parser:
     def __init__(self, parse_table, callbacks):
         self.states = parse_table.states
-        self.start_state = parse_table.start_state
-        self.end_state = parse_table.end_state
+        self.start_states = parse_table.start_states
+        self.end_states = parse_table.end_states
         self.callbacks = callbacks
 
-    def parse(self, seq, set_state=None):
+    def parse(self, seq, start, set_state=None):
         token = None
         stream = iter(seq)
         states = self.states
 
-        state_stack = [self.start_state]
+        start_state = self.start_states[start]
+        end_state = self.end_states[start]
+
+        state_stack = [start_state]
         value_stack = []
 
-        if set_state: set_state(self.start_state)
+        if set_state: set_state(start_state)
 
         def get_action(token):
             state = state_stack[-1]
@@ -1372,7 +1570,7 @@ class _Parser:
         for token in stream:
             while True:
                 action, arg = get_action(token)
-                assert arg != self.end_state
+                assert arg != end_state
 
                 if action is Shift:
                     state_stack.append(arg)
@@ -1385,12 +1583,10 @@ class _Parser:
         token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1)
         while True:
             _action, arg = get_action(token)
-            if _action is Shift:
-                assert arg == self.end_state
-                val ,= value_stack
-                return val
-            else:
-                reduce(arg)
+            assert(_action is Reduce)
+            reduce(arg)
+            if state_stack[-1] == end_state:
+                return value_stack[-1]
 
 
 
@@ -1405,11 +1601,12 @@ class Action:
 Shift = Action('Shift')
 Reduce = Action('Reduce')
 
+
 class ParseTable:
-    def __init__(self, states, start_state, end_state):
+    def __init__(self, states, start_states, end_states):
         self.states = states
-        self.start_state = start_state
-        self.end_state = end_state
+        self.start_states = start_states
+        self.end_states = end_states
 
     def serialize(self, memo):
         tokens = Enumerator()
@@ -1424,8 +1621,8 @@ class ParseTable:
         return {
             'tokens': tokens.reversed(),
             'states': states,
-            'start_state': self.start_state,
-            'end_state': self.end_state,
+            'start_states': self.start_states,
+            'end_states': self.end_states,
         }
 
     @classmethod
@@ -1436,7 +1633,7 @@ class ParseTable:
                     for token, (action, arg) in actions.items()}
             for state, actions in data['states'].items()
         }
-        return cls(states, data['start_state'], data['end_state'])
+        return cls(states, data['start_states'], data['end_states'])
 
 
 class IntParseTable(ParseTable):
@@ -1453,9 +1650,9 @@ class IntParseTable(ParseTable):
             int_states[ state_to_idx[s] ] = la
 
 
-        start_state = state_to_idx[parse_table.start_state]
-        end_state = state_to_idx[parse_table.end_state]
-        return cls(int_states, start_state, end_state)
+        start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()}
+        end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()}
+        return cls(int_states, start_states, end_states)
 
 
 
@@ -1491,63 +1688,84 @@ def get_frontend(parser, lexer):
         raise ValueError('Unknown parser: %s' % parser)
 
 
+class _ParserFrontend(Serialize):
+    def _parse(self, input, start, *args):
+        if start is None:
+            start = self.start
+            if len(start) > 1:
+                raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start)
+            start ,= start
+        return self.parser.parse(input, start, *args)
 
 
-class WithLexer(Serialize):
+class WithLexer(_ParserFrontend):
     lexer = None
     parser = None
     lexer_conf = None
+    start = None
 
-    __serialize_fields__ = 'parser', 'lexer'
-    __serialize_namespace__ = Rule, ContextualLexer, TraditionalLexer
+    __serialize_fields__ = 'parser', 'lexer_conf', 'start'
+    __serialize_namespace__ = LexerConf,
+
+    def __init__(self, lexer_conf, parser_conf, options=None):
+        self.lexer_conf = lexer_conf
+        self.start = parser_conf.start
+        self.postlex = lexer_conf.postlex
 
     @classmethod
     def deserialize(cls, data, memo, callbacks, postlex):
         inst = super(WithLexer, cls).deserialize(data, memo)
         inst.postlex = postlex
         inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
+        inst.init_lexer()
         return inst
 
     def _serialize(self, data, memo):
         data['parser'] = data['parser'].serialize(memo)
 
-    def init_traditional_lexer(self, lexer_conf):
-        self.lexer_conf = lexer_conf
-        self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks)
-        self.postlex = lexer_conf.postlex
-
-    def init_contextual_lexer(self, lexer_conf):
-        self.lexer_conf = lexer_conf
-        self.postlex = lexer_conf.postlex
-        states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
-        always_accept = self.postlex.always_accept if self.postlex else ()
-        self.lexer = ContextualLexer(lexer_conf.tokens, states,
-                                     ignore=lexer_conf.ignore,
-                                     always_accept=always_accept,
-                                     user_callbacks=lexer_conf.callbacks)
-
-    def lex(self, text):
-        stream = self.lexer.lex(text)
+    def lex(self, *args):
+        stream = self.lexer.lex(*args)
         return self.postlex.process(stream) if self.postlex else stream
 
-    def parse(self, text):
+    def parse(self, text, start=None):
         token_stream = self.lex(text)
-        sps = self.lexer.set_parser_state
-        return self.parser.parse(token_stream, *[sps] if sps is not NotImplemented else [])
+        return self._parse(token_stream, start)
 
+    def init_traditional_lexer(self):
+        self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
 
-class LALR_TraditionalLexer(WithLexer):
+class LALR_WithLexer(WithLexer):
     def __init__(self, lexer_conf, parser_conf, options=None):
         debug = options.debug if options else False
         self.parser = LALR_Parser(parser_conf, debug=debug)
-        self.init_traditional_lexer(lexer_conf)
+        WithLexer.__init__(self, lexer_conf, parser_conf, options)
 
-class LALR_ContextualLexer(WithLexer):
-    def __init__(self, lexer_conf, parser_conf, options=None):
-        debug = options.debug if options else False
-        self.parser = LALR_Parser(parser_conf, debug=debug)
-        self.init_contextual_lexer(lexer_conf)
+        self.init_lexer()
+
+    def init_lexer(self):
+        raise NotImplementedError()
+
+class LALR_TraditionalLexer(LALR_WithLexer):
+    def init_lexer(self):
+        self.init_traditional_lexer()
+
+class LALR_ContextualLexer(LALR_WithLexer):
+    def init_lexer(self):
+        states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
+        always_accept = self.postlex.always_accept if self.postlex else ()
+        self.lexer = ContextualLexer(self.lexer_conf.tokens, states,
+                                     ignore=self.lexer_conf.ignore,
+                                     always_accept=always_accept,
+                                     user_callbacks=self.lexer_conf.callbacks)
+
+
+    def parse(self, text, start=None):
+        parser_state = [None]
+        def set_parser_state(s):
+            parser_state[0] = s
 
+        token_stream = self.lex(text, lambda: parser_state[0])
+        return self._parse(token_stream, start, set_parser_state)
 
 
 class LarkOptions(Serialize):
@@ -1576,8 +1794,7 @@ class LarkOptions(Serialize):
         keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
         cache_grammar - Cache the Lark grammar (Default: False)
         postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
-        start - The start symbol (Default: start)
-        profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
+        start - The start symbol, either a string, or a list of strings for multiple possible starts (Default: "start")
         priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto)
         propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
         lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
@@ -1596,12 +1813,12 @@ class LarkOptions(Serialize):
         'lexer': 'auto',
         'transformer': None,
         'start': 'start',
-        'profile': False,
         'priority': 'auto',
         'ambiguity': 'auto',
-        'propagate_positions': False,
+        'propagate_positions': True,
         'lexer_callbacks': {},
-        'maybe_placeholders': False,
+        'maybe_placeholders': True,
+        'edit_terminals': None,
     }
 
     def __init__(self, options_dict):
@@ -1618,6 +1835,9 @@ class LarkOptions(Serialize):
 
             options[name] = value
 
+        if isinstance(options['start'], STRING_TYPE):
+            options['start'] = [options['start']]
+
         self.__dict__['options'] = options
 
         assert self.parser in ('earley', 'lalr', 'cyk', None)
@@ -1630,7 +1850,11 @@ class LarkOptions(Serialize):
             raise ValueError("Unknown options: %s" % o.keys())
 
     def __getattr__(self, name):
-        return self.options[name]
+        try:
+            return self.options[name]
+        except KeyError as e:
+            raise AttributeError(e)
+
     def __setattr__(self, name, value):
         assert name in self.options
         self.options[name] = value
@@ -1643,30 +1867,6 @@ class LarkOptions(Serialize):
         return cls(data)
 
 
-class Profiler:
-    def __init__(self):
-        self.total_time = defaultdict(float)
-        self.cur_section = '__init__'
-        self.last_enter_time = time.time()
-
-    def enter_section(self, name):
-        cur_time = time.time()
-        self.total_time[self.cur_section] += cur_time - self.last_enter_time
-        self.last_enter_time = cur_time
-        self.cur_section = name
-
-    def make_wrapper(self, name, f):
-        def wrapper(*args, **kwargs):
-            last_section = self.cur_section
-            self.enter_section(name)
-            try:
-                return f(*args, **kwargs)
-            finally:
-                self.enter_section(last_section)
-
-        return wrapper
-
-
 class Lark(Serialize):
     def __init__(self, grammar, **options):
         """
@@ -1694,9 +1894,6 @@ class Lark(Serialize):
         if self.options.cache_grammar:
             raise NotImplementedError("Not available yet")
 
-        assert not self.options.profile, "Feature temporarily disabled"
-        # self.profiler = Profiler() if self.options.profile else None
-
         if self.options.lexer == 'auto':
             if self.options.parser == 'lalr':
                 self.options.lexer = 'contextual'
@@ -1733,7 +1930,13 @@ class Lark(Serialize):
         self.grammar = load_grammar(grammar, self.source)
 
         # Compile the EBNF grammar into BNF
-        self.terminals, self.rules, self.ignore_tokens = self.grammar.compile()
+        self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)
+
+        if self.options.edit_terminals:
+            for t in self.terminals:
+                self.options.edit_terminals(t)
+
+        self._terminals_dict = {t.name:t for t in self.terminals}
 
         # If the user asked to invert the priorities, negate them all here.
         # This replaces the old 'resolve__antiscore_sum' option.
@@ -1748,7 +1951,16 @@ class Lark(Serialize):
             for rule in self.rules:
                 if rule.options.priority is not None:
                     rule.options.priority = None
-        self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)
+
+        # TODO Deprecate lexer_callbacks?
+        lexer_callbacks = dict(self.options.lexer_callbacks)
+        if self.options.transformer:
+            t = self.options.transformer
+            for term in self.terminals:
+                if hasattr(t, term.name):
+                    lexer_callbacks[term.name] = getattr(t, term.name)
+
+        self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, lexer_callbacks)
 
         if self.options.parser:
             self.parser = self._build_parser()
@@ -1783,6 +1995,7 @@ class Lark(Serialize):
         options['postlex'] = postlex
         inst.options = LarkOptions.deserialize(options, memo)
         inst.rules = [Rule.deserialize(r, memo) for r in data['rules']]
+        inst.source = '<deserialized>'
         inst._prepare_callbacks()
         inst.parser = inst.parser_class.deserialize(data['parser'], memo, inst._callbacks, inst.options.postlex)
         return inst
@@ -1819,16 +2032,25 @@ class Lark(Serialize):
             return self.options.postlex.process(stream)
         return stream
 
-    def parse(self, text):
-        "Parse the given text, according to the options provided. Returns a tree, unless specified otherwise."
-        return self.parser.parse(text)
+    def get_terminal(self, name):
+        "Get information about a terminal"
+        return self._terminals_dict[name]
+
+    def parse(self, text, start=None):
+        """Parse the given text, according to the options provided.
+
+        The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option).
+
+        Returns a tree, unless specified otherwise.
+        """
+        return self.parser.parse(text, start=start)
 
 
 DATA = (
-{'rules': [{'@': 27}, {'@': 31}, {'@': 26}, {'@': 13}, {'@': 24}, {'@': 18}, {'@': 16}, {'@': 23}, {'@': 21}, {'@': 17}, {'@': 28}, {'@': 30}, {'@': 25}, {'@': 29}, {'@': 20}, {'@': 22}, {'@': 15}, {'@': 19}, {'@': 12}, {'@': 14}], 'parser': {'parser': {'tokens': {0: 'COMMA', 1: 'RBRACE', 2: u'pair', 3: u'ESCAPED_STRING', 4: u'string', 5: 'COLON', 6: 'RSQB', 7: '$END', 8: 'LBRACE', 9: u'FALSE', 10: u'object', 11: u'SIGNED_NUMBER', 12: u'value', 13: 'LSQB', 14: u'NULL', 15: u'TRUE', 16: u'array', 17: '__anon_star_1', 18: '__anon_star_0', 19: 'start'}, 'states': {0: {0: (0, 1), 1: (0, 32)}, 1: {2: (0, 5), 3: (0, 21), 4: (0, 3)}, 2: {0: (1, {'@': 12}), 1: (1, {'@': 12})}, 3: {5: (0, 13)}, 4: {0: (1, {'@': 13}), 1: (1, {'@': 13}), 6: (1, {'@': 13}), 7: (1, {'@': 13})}, 5: {0: (1, {'@': 14}), 1: (1, {'@': 14})}, 6: {0: (1, {'@': 15}), 6: (1, {'@': 15})}, 7: {0: (1, {'@': 16}), 1: (1, {'@': 16}), 6: (1, {'@': 16}), 7: (1, {'@': 16})}, 8: {3: (0, 21), 4: (0, 4), 8: (0, 34), 9: (0, 7), 10: (0, 33), 11: (0, 25), 12: (0, 12), 13: (0, 14), 14: (0, 24), 15: (0, 11), 16: (0, 27)}, 9: {0: (1, {'@': 17}), 1: (1, {'@': 17}), 6: (1, {'@': 17}), 7: (1, {'@': 17})}, 10: {0: (0, 22), 17: (0, 0), 1: (0, 26)}, 11: {0: (1, {'@': 18}), 1: (1, {'@': 18}), 6: (1, {'@': 18}), 7: (1, {'@': 18})}, 12: {0: (1, {'@': 19}), 6: (1, {'@': 19})}, 13: {3: (0, 21), 4: (0, 4), 8: (0, 34), 9: (0, 7), 10: (0, 33), 11: (0, 25), 12: (0, 15), 13: (0, 14), 14: (0, 24), 15: (0, 11), 16: (0, 27)}, 14: {3: (0, 21), 4: (0, 4), 6: (0, 30), 8: (0, 34), 9: (0, 7), 10: (0, 33), 11: (0, 25), 12: (0, 23), 13: (0, 14), 14: (0, 24), 15: (0, 11), 16: (0, 27)}, 15: {0: (1, {'@': 20}), 1: (1, {'@': 20})}, 16: {0: (1, {'@': 21}), 1: (1, {'@': 21}), 6: (1, {'@': 21}), 7: (1, {'@': 21})}, 17: {3: (0, 21), 4: (0, 4), 8: (0, 34), 9: (0, 7), 10: (0, 33), 11: (0, 25), 12: (0, 6), 13: (0, 14), 14: (0, 24), 15: (0, 11), 16: (0, 27)}, 18: {}, 19: {7: (0, 18)}, 20: {0: (0, 8), 6: (0, 16)}, 21: {0: (1, {'@': 22}), 1: (1, {'@': 22}), 5: (1, {'@': 22}), 6: (1, {'@': 22}), 7: (1, {'@': 22})}, 22: {2: (0, 2), 3: (0, 21), 4: (0, 3)}, 23: {0: (0, 17), 18: (0, 20), 6: (0, 9)}, 24: {0: (1, {'@': 23}), 1: (1, {'@': 23}), 6: (1, {'@': 23}), 7: (1, {'@': 23})}, 25: {0: (1, {'@': 24}), 1: (1, {'@': 24}), 6: (1, {'@': 24}), 7: (1, {'@': 24})}, 26: {0: (1, {'@': 25}), 1: (1, {'@': 25}), 6: (1, {'@': 25}), 7: (1, {'@': 25})}, 27: {0: (1, {'@': 26}), 1: (1, {'@': 26}), 6: (1, {'@': 26}), 7: (1, {'@': 26})}, 28: {3: (0, 21), 4: (0, 4), 8: (0, 34), 9: (0, 7), 10: (0, 33), 11: (0, 25), 12: (0, 29), 13: (0, 14), 14: (0, 24), 15: (0, 11), 16: (0, 27), 19: (0, 19)}, 29: {7: (1, {'@': 27})}, 30: {0: (1, {'@': 28}), 1: (1, {'@': 28}), 6: (1, {'@': 28}), 7: (1, {'@': 28})}, 31: {0: (1, {'@': 29}), 1: (1, {'@': 29}), 6: (1, {'@': 29}), 7: (1, {'@': 29})}, 32: {0: (1, {'@': 30}), 1: (1, {'@': 30}), 6: (1, {'@': 30}), 7: (1, {'@': 30})}, 33: {0: (1, {'@': 31}), 1: (1, {'@': 31}), 6: (1, {'@': 31}), 7: (1, {'@': 31})}, 34: {1: (0, 31), 2: (0, 10), 3: (0, 21), 4: (0, 3)}}, 'end_state': 18, 'start_state': 28}, '__type__': 'LALR_TraditionalLexer', 'lexer': {'ignore_types': [u'WS'], 'terminals': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], '__type__': 'TraditionalLexer', 'newline_types': [u'WS']}}, '__type__': 'Lark', 'options': {'profile': False, 'transformer': None, 'lexer': 'standard', 'lexer_callbacks': {}, 'postlex': None, 'parser': 'lalr', 'cache_grammar': False, 'tree_class': None, 'priority': None, 'start': 'start', 'keep_all_tokens': False, 'ambiguity': 'auto', 'debug': False, 'propagate_positions': False, 'maybe_placeholders': False}}
+{'rules': [{'@': 27}, {'@': 31}, {'@': 26}, {'@': 13}, {'@': 25}, {'@': 18}, {'@': 16}, {'@': 24}, {'@': 22}, {'@': 17}, {'@': 28}, {'@': 30}, {'@': 20}, {'@': 29}, {'@': 21}, {'@': 23}, {'@': 15}, {'@': 19}, {'@': 12}, {'@': 14}], 'parser': {'lexer_conf': {'tokens': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], 'ignore': [u'WS'], '__type__': 'LexerConf'}, 'parser': {'tokens': {0: 'LBRACE', 1: u'FALSE', 2: u'string', 3: u'object', 4: u'NULL', 5: u'SIGNED_NUMBER', 6: u'value', 7: 'start', 8: 'LSQB', 9: u'ESCAPED_STRING', 10: u'TRUE', 11: u'array', 12: 'COMMA', 13: 'RBRACE', 14: u'pair', 15: 'COLON', 16: 'RSQB', 17: '$END', 18: '__anon_star_1', 19: '__anon_star_0'}, 'states': {0: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 28), 7: (0, 11), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 1: {12: (0, 2), 13: (0, 31)}, 2: {9: (0, 20), 2: (0, 4), 14: (0, 6)}, 3: {12: (1, {'@': 12}), 13: (1, {'@': 12})}, 4: {15: (0, 15)}, 5: {16: (1, {'@': 13}), 17: (1, {'@': 13}), 12: (1, {'@': 13}), 13: (1, {'@': 13})}, 6: {12: (1, {'@': 14}), 13: (1, {'@': 14})}, 7: {16: (1, {'@': 15}), 12: (1, {'@': 15})}, 8: {16: (1, {'@': 16}), 17: (1, {'@': 16}), 12: (1, {'@': 16}), 13: (1, {'@': 16})}, 9: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 14), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 10: {16: (1, {'@': 17}), 17: (1, {'@': 17}), 12: (1, {'@': 17}), 13: (1, {'@': 17})}, 11: {}, 12: {18: (0, 1), 12: (0, 21), 13: (0, 16)}, 13: {16: (1, {'@': 18}), 17: (1, {'@': 18}), 12: (1, {'@': 18}), 13: (1, {'@': 18})}, 14: {16: (1, {'@': 19}), 12: (1, {'@': 19})}, 15: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 17), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 16: {16: (1, {'@': 20}), 17: (1, {'@': 20}), 12: (1, {'@': 20}), 13: (1, {'@': 20})}, 17: {12: (1, {'@': 21}), 13: (1, {'@': 21})}, 18: {16: (1, {'@': 22}), 17: (1, {'@': 22}), 12: (1, {'@': 22}), 13: (1, {'@': 22})}, 19: {16: (0, 18), 12: (0, 9)}, 20: {16: (1, {'@': 23}), 17: (1, {'@': 23}), 12: (1, {'@': 23}), 13: (1, {'@': 23}), 15: (1, {'@': 23})}, 21: {9: (0, 20), 2: (0, 4), 14: (0, 3)}, 22: {16: (0, 10), 19: (0, 19), 12: (0, 27)}, 23: {16: (1, {'@': 24}), 17: (1, {'@': 24}), 12: (1, {'@': 24}), 13: (1, {'@': 24})}, 24: {16: (1, {'@': 25}), 17: (1, {'@': 25}), 12: (1, {'@': 25}), 13: (1, {'@': 25})}, 25: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 22), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26), 16: (0, 29)}, 26: {16: (1, {'@': 26}), 17: (1, {'@': 26}), 12: (1, {'@': 26}), 13: (1, {'@': 26})}, 27: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 7), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 28: {17: (1, {'@': 27})}, 29: {16: (1, {'@': 28}), 17: (1, {'@': 28}), 12: (1, {'@': 28}), 13: (1, {'@': 28})}, 30: {16: (1, {'@': 29}), 17: (1, {'@': 29}), 12: (1, {'@': 29}), 13: (1, {'@': 29})}, 31: {16: (1, {'@': 30}), 17: (1, {'@': 30}), 12: (1, {'@': 30}), 13: (1, {'@': 30})}, 32: {16: (1, {'@': 31}), 17: (1, {'@': 31}), 12: (1, {'@': 31}), 13: (1, {'@': 31})}, 33: {9: (0, 20), 2: (0, 4), 13: (0, 30), 14: (0, 12)}}, 'end_states': {'start': 11}, 'start_states': {'start': 0}}, '__type__': 'LALR_ContextualLexer', 'start': ['start']}, '__type__': 'Lark', 'options': {'transformer': None, 'lexer': 'contextual', 'lexer_callbacks': {}, 'debug': False, 'postlex': None, 'parser': 'lalr', 'cache_grammar': False, 'tree_class': None, 'priority': None, 'start': ['start'], 'keep_all_tokens': False, 'ambiguity': 'auto', 'edit_terminals': None, 'propagate_positions': True, 'maybe_placeholders': True}}
 )
 MEMO = (
-{0: {'priority': 1, 'pattern': {'__type__': 'PatternRE', 'flags': [], 'value': u'(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)'}, '__type__': 'TerminalDef', 'name': u'SIGNED_NUMBER'}, 1: {'priority': 1, 'pattern': {'__type__': 'PatternRE', 'flags': [], 'value': u'\\".*?(?<!\\\\)(\\\\\\\\)*?\\"'}, '__type__': 'TerminalDef', 'name': u'ESCAPED_STRING'}, 2: {'priority': 1, 'pattern': {'__type__': 'PatternRE', 'flags': [], 'value': u'(?:[ \t\x0c\r\n])+'}, '__type__': 'TerminalDef', 'name': u'WS'}, 3: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'false'}, '__type__': 'TerminalDef', 'name': u'FALSE'}, 4: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'null'}, '__type__': 'TerminalDef', 'name': u'NULL'}, 5: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'true'}, '__type__': 'TerminalDef', 'name': u'TRUE'}, 6: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u':'}, '__type__': 'TerminalDef', 'name': 'COLON'}, 7: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u','}, '__type__': 'TerminalDef', 'name': 'COMMA'}, 8: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'{'}, '__type__': 'TerminalDef', 'name': 'LBRACE'}, 9: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'['}, '__type__': 'TerminalDef', 'name': 'LSQB'}, 10: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'}'}, '__type__': 'TerminalDef', 'name': 'RBRACE'}, 11: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u']'}, '__type__': 'TerminalDef', 'name': 'RSQB'}, 12: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': None, 'alias': None, 'order': 1}, 13: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 14: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': '__anon_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': None, 'alias': None, 'order': 2}, 15: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': None, 'alias': None, 'order': 1}, 16: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'FALSE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'false', 'order': 0}, 17: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 18: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'TRUE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'true', 'order': 0}, 19: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': '__anon_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': None, 'alias': None, 'order': 2}, 20: {'origin': {'__type__': 'NonTerminal', 'name': u'pair'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COLON'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 21: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 3}, 22: {'origin': {'__type__': 'NonTerminal', 'name': u'string'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'ESCAPED_STRING'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 23: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'NULL'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'null', 'order': 0}, 24: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'SIGNED_NUMBER'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'number', 'order': 0}, 25: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 26: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'array'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 27: {'origin': {'__type__': 'NonTerminal', 'name': u'start'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 28: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 29: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 30: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 3}, 31: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'object'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}}
+{0: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [2, 4294967295], 'flags': [], 'value': u'\\".*?(?<!\\\\)(\\\\\\\\)*?\\"'}, '__type__': 'TerminalDef', 'name': u'ESCAPED_STRING'}, 1: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:[ \t\x0c\r\n])+'}, '__type__': 'TerminalDef', 'name': u'WS'}, 2: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)'}, '__type__': 'TerminalDef', 'name': u'SIGNED_NUMBER'}, 3: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'true'}, '__type__': 'TerminalDef', 'name': u'TRUE'}, 4: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'false'}, '__type__': 'TerminalDef', 'name': u'FALSE'}, 5: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'null'}, '__type__': 'TerminalDef', 'name': u'NULL'}, 6: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u','}, '__type__': 'TerminalDef', 'name': 'COMMA'}, 7: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'['}, '__type__': 'TerminalDef', 'name': 'LSQB'}, 8: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u']'}, '__type__': 'TerminalDef', 'name': 'RSQB'}, 9: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'{'}, '__type__': 'TerminalDef', 'name': 'LBRACE'}, 10: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'}'}, '__type__': 'TerminalDef', 'name': 'RBRACE'}, 11: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u':'}, '__type__': 'TerminalDef', 'name': 'COLON'}, 12: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 13: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 2}, 14: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': '__anon_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 15: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 16: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'FALSE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'false', 'order': 5}, 17: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 18: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'TRUE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'true', 'order': 4}, 19: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': '__anon_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 20: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 21: {'origin': {'__type__': 'NonTerminal', 'name': u'pair'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COLON'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 22: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 23: {'origin': {'__type__': 'NonTerminal', 'name': u'string'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'ESCAPED_STRING'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 24: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'NULL'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'null', 'order': 6}, 25: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'SIGNED_NUMBER'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'number', 'order': 3}, 26: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'array'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 1}, 27: {'origin': {'__type__': 'NonTerminal', 'name': u'start'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 28: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 29: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 30: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 31: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'object'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}}
 )
 Shift = 0
 Reduce = 1
diff --git a/lark/lark.py b/lark/lark.py
index ea4f46a..33b57e3 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -1,8 +1,6 @@
 from __future__ import absolute_import
 
 import os
-import time
-from collections import defaultdict
 from io import open
 
 from .utils import STRING_TYPE, Serialize, SerializeMemoizer
diff --git a/lark/lexer.py b/lark/lexer.py
index 9d26318..ecff75f 100644
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -303,7 +303,7 @@ class TraditionalLexer(Lexer):
         for t in terminals:
             try:
                 re.compile(t.pattern.to_regexp())
-            except:
+            except re.error:
                 raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
 
             if t.pattern.min_width == 0:
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 2cd834c..83ec341 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -2,11 +2,10 @@
 
 import os.path
 import sys
-from ast import literal_eval
 from copy import copy, deepcopy
 from io import open
 
-from .utils import bfs
+from .utils import bfs, eval_escaping
 from .lexer import Token, TerminalDef, PatternStr, PatternRE
 
 from .parse_tree_builder import ParseTreeBuilder
@@ -346,31 +345,6 @@ def _rfind(s, choices):
 
 
 
-def _fix_escaping(s):
-    w = ''
-    i = iter(s)
-    for n in i:
-        w += n
-        if n == '\\':
-            try:
-                n2 = next(i)
-            except StopIteration:
-                raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
-            if n2 == '\\':
-                w += '\\\\'
-            elif n2 not in 'uxnftr':
-                w += '\\'
-            w += n2
-    w = w.replace('\\"', '"').replace("'", "\\'")
-
-    to_eval = "u'''%s'''" % w
-    try:
-        s = literal_eval(to_eval)
-    except SyntaxError as e:
-        raise ValueError(s, e)
-
-    return s
-
 
 def _literal_to_pattern(literal):
     v = literal.value
@@ -383,7 +357,7 @@ def _literal_to_pattern(literal):
     assert v[0] == v[-1] and v[0] in '"/'
     x = v[1:-1]
 
-    s = _fix_escaping(x)
+    s = eval_escaping(x)
 
     if literal.type == 'STRING':
         s = s.replace('\\\\', '\\')
@@ -401,7 +375,7 @@ class PrepareLiterals(Transformer_InPlace):
         assert start.type == end.type == 'STRING'
         start = start.value[1:-1]
         end = end.value[1:-1]
-        assert len(_fix_escaping(start)) == len(_fix_escaping(end)) == 1, (start, end, len(_fix_escaping(start)), len(_fix_escaping(end)))
+        assert len(eval_escaping(start)) == len(eval_escaping(end)) == 1, (start, end, len(eval_escaping(start)), len(eval_escaping(end)))
         regexp = '[%s-%s]' % (start, end)
         return ST('pattern', [PatternRE(regexp)])
 
@@ -543,7 +517,8 @@ class Grammar:
             for dups in duplicates.values():
                 if len(dups) > 1:
                     if dups[0].expansion:
-                        raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)" % ''.join('\n  * %s' % i for i in dups))
+                        raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)"
+                                           % ''.join('\n  * %s' % i for i in dups))
 
                     # Empty rule; assert all other attributes are equal
                     assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index 4af2c24..7822485 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -202,7 +202,7 @@ class LALR_Analyzer(GrammarAnalyzer):
                         continue
                     s2 = rp2.next
                     # if s2 is a terminal
-                    if not s2 in self.lr0_rules_by_origin:
+                    if s2 not in self.lr0_rules_by_origin:
                         dr.add(s2)
                     if s2 in self.NULLABLE:
                         r.add((next_state, s2))
diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py
index 07016ff..9934567 100644
--- a/lark/tools/standalone.py
+++ b/lark/tools/standalone.py
@@ -34,6 +34,9 @@
 #    See <http://www.gnu.org/licenses/>.
 #
 #
+
+import os
+from io import open
 ###}
 
 import pprint
diff --git a/lark/utils.py b/lark/utils.py
index 9513b8b..b1354cf 100644
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -1,4 +1,5 @@
 import sys
+from ast import literal_eval
 from collections import deque
 
 class fzset(frozenset):
@@ -239,3 +240,28 @@ class Enumerator(Serialize):
         assert len(r) == len(self.enums)
         return r
 
+
+def eval_escaping(s):
+    w = ''
+    i = iter(s)
+    for n in i:
+        w += n
+        if n == '\\':
+            try:
+                n2 = next(i)
+            except StopIteration:
+                raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s)
+            if n2 == '\\':
+                w += '\\\\'
+            elif n2 not in 'uxnftr':
+                w += '\\'
+            w += n2
+    w = w.replace('\\"', '"').replace("'", "\\'")
+
+    to_eval = "u'''%s'''" % w
+    try:
+        s = literal_eval(to_eval)
+    except SyntaxError as e:
+        raise ValueError(s, e)
+
+    return s

From 5682dcc57abef5996dc9053bffc108552640055a Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sun, 12 Jan 2020 16:03:16 +0200
Subject: [PATCH 113/132] Added python_bytecode example + Tiny bugfix

---
 examples/README.md          |  1 +
 examples/python3.lark       |  8 ++--
 examples/python_bytecode.py | 77 +++++++++++++++++++++++++++++++++++++
 lark/load_grammar.py        |  2 +-
 4 files changed, 83 insertions(+), 5 deletions(-)
 create mode 100644 examples/python_bytecode.py

diff --git a/examples/README.md b/examples/README.md
index f40157d..8053ebd 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -27,6 +27,7 @@ For example, the following will parse all the Python files in the standard libra
 
 - [error\_reporting\_lalr.py](error_reporting_lalr.py) - A demonstration of example-driven error reporting with the LALR parser
 - [python\_parser.py](python_parser.py) - A fully-working Python 2 & 3 parser (but not production ready yet!)
+- [python\_bytecode.py](python_bytecode.py) - A toy example showing how to compile Python directly to bytecode
 - [conf\_lalr.py](conf_lalr.py) - Demonstrates the power of LALR's contextual lexer on a toy configuration language
 - [conf\_earley.py](conf_earley.py) - Demonstrates the power of Earley's dynamic lexer on a toy configuration language
 - [custom\_lexer.py](custom_lexer.py) - Demonstrates using a custom lexer to parse a non-textual stream of data
diff --git a/examples/python3.lark b/examples/python3.lark
index 3f39f9f..78c9875 100644
--- a/examples/python3.lark
+++ b/examples/python3.lark
@@ -81,7 +81,7 @@ with_item: test ["as" expr]
 except_clause: "except" [test ["as" NAME]]
 suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT
 
-?test: or_test ["if" or_test "else" test] | lambdef
+?test: or_test ("if" or_test "else" test)? | lambdef
 ?test_nocond: or_test | lambdef_nocond
 lambdef: "lambda" [varargslist] ":" test
 lambdef_nocond: "lambda" [varargslist] ":" test_nocond
@@ -107,7 +107,7 @@ star_expr: "*" expr
 // sake of a __future__ import described in PEP 401 (which really works :-)
 !_comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not"
 
-?power: await_expr ["**" factor]
+?power: await_expr ("**" factor)?
 ?await_expr: AWAIT? atom_expr
 AWAIT: "await"
 
@@ -137,7 +137,7 @@ dictorsetmaker: ( ((test ":" test | "**" expr) (comp_for | ("," (test ":" test |
 
 classdef: "class" NAME ["(" [arguments] ")"] ":" suite
 
-arguments: argvalue ("," argvalue)*  ["," [ starargs | kwargs]]
+arguments: argvalue ("," argvalue)*  ("," [ starargs | kwargs])?
          | starargs
          | kwargs
          | test comp_for
@@ -145,7 +145,7 @@ arguments: argvalue ("," argvalue)*  ["," [ starargs | kwargs]]
 starargs: "*" test ("," "*" test)* ("," argvalue)* ["," kwargs]
 kwargs: "**" test
 
-?argvalue: test ["=" test]
+?argvalue: test ("=" test)?
 
 
 
diff --git a/examples/python_bytecode.py b/examples/python_bytecode.py
new file mode 100644
index 0000000..cbb8ccd
--- /dev/null
+++ b/examples/python_bytecode.py
@@ -0,0 +1,77 @@
+#
+# This is a toy example that compiles Python directly to bytecode, without generating an AST.
+# It currently only works for very very simple Python code.
+#
+# It requires the 'bytecode' library. You can get it using
+#
+#     $ pip install bytecode
+#
+
+from lark import Lark, Transformer, v_args
+from lark.indenter import Indenter
+
+from bytecode import Instr, Bytecode
+
+class PythonIndenter(Indenter):
+    NL_type = '_NEWLINE'
+    OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
+    CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
+    INDENT_type = '_INDENT'
+    DEDENT_type = '_DEDENT'
+    tab_len = 8
+
+
+@v_args(inline=True)
+class Compile(Transformer):
+    def number(self, n):
+        return [Instr('LOAD_CONST', int(n))]
+    def string(self, s):
+        return [Instr('LOAD_CONST', s[1:-1])]
+    def var(self, n):
+        return [Instr('LOAD_NAME', n)]
+
+    def arith_expr(self, a, op, b):
+        # TODO support chain arithmetic
+        assert op == '+'
+        return a + b + [Instr('BINARY_ADD')]
+
+    def arguments(self, args):
+        return args
+
+    def funccall(self, name, args):
+        return name + args + [Instr('CALL_FUNCTION', 1)]
+
+    @v_args(inline=False)
+    def file_input(self, stmts):
+        return sum(stmts, []) + [Instr("RETURN_VALUE")]
+
+    def expr_stmt(self, lval, rval):
+        # TODO more complicated than that
+        name ,= lval
+        assert name.name == 'LOAD_NAME' # XXX avoid with another layer of abstraction
+        return rval + [Instr("STORE_NAME", name.arg)]
+
+    def __default__(self, *args):
+        assert False, args
+
+
+python_parser3 = Lark.open('python3.lark', rel_to=__file__, start='file_input',
+                           parser='lalr', postlex=PythonIndenter(),
+                           transformer=Compile(), propagate_positions=False)
+
+def compile_python(s):
+    insts = python_parser3.parse(s+"\n")
+    return Bytecode(insts).to_code()
+
+code = compile_python("""
+a = 3
+b = 5
+print("Hello World!")
+print(a+(b+2))
+print((a+b)+2)
+""")
+exec(code)
+# -- Output --
+# Hello World!
+# 10
+# 10
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 83ec341..77095a8 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -501,7 +501,7 @@ class Grammar:
 
                 empty_indices = [x==_EMPTY for x in expansion]
                 if any(empty_indices):
-                    exp_options = copy(options)
+                    exp_options = copy(options) or RuleOptions()
                     exp_options.empty_indices = empty_indices
                     expansion = [x for x in expansion if x!=_EMPTY]
                 else:

From ae691bf35e9e13cdb4f718ca58c41b7e72b51953 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 16 Jan 2020 12:15:37 +0200
Subject: [PATCH 114/132] Revert propagate_positions to be False by default,
 still not ready for prime-time

---
 README.md            | 6 +++---
 docs/classes.md      | 4 ++--
 lark/lark.py         | 2 +-
 tests/test_parser.py | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 84e4921..6f5ed74 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ Lark provides syntax highlighting for its grammar files (\*.lark):
 
 ### Clones
 
-- [Lerche (Julia)](https://github.com/jamesrhester/Lerche.jl) - an unofficial clone, written entirely in Julia. 
+- [Lerche (Julia)](https://github.com/jamesrhester/Lerche.jl) - an unofficial clone, written entirely in Julia.
 
 ### Hello World
 
@@ -141,10 +141,10 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail
  - [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration
  - [synapse](https://github.com/vertexproject/synapse) - an intelligence analysis platform
  - [Command-Block-Assembly](https://github.com/simon816/Command-Block-Assembly) - An assembly language, and C compiler, for Minecraft commands
- - [SPFlow](https://github.com/SPFlow/SPFlow) - Library for Sum-Product Networks 
+ - [SPFlow](https://github.com/SPFlow/SPFlow) - Library for Sum-Product Networks
  - [Torchani](https://github.com/aiqm/torchani) - Accurate Neural Network Potential on PyTorch
  - [required](https://github.com/shezadkhan137/required) - multi-field validation using docstrings
- - [miniwdl](https://github.com/chanzuckerberg/miniwdl) - A static analysis toolkit for the Workflow Description Language 
+ - [miniwdl](https://github.com/chanzuckerberg/miniwdl) - A static analysis toolkit for the Workflow Description Language
  - [pytreeview](https://gitlab.com/parmenti/pytreeview) - a lightweight tree-based grammar explorer
 
 
diff --git a/docs/classes.md b/docs/classes.md
index 021b2f4..284ce73 100644
--- a/docs/classes.md
+++ b/docs/classes.md
@@ -32,7 +32,7 @@ The Lark class accepts a grammar string or file object, and keyword options:
 
 * **keep_all_tokens** - Don't throw away any terminals from the tree (Default=`False`)
 
-* **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`True`)
+* **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`False`)
 
 * **maybe_placeholders** - The `[]` operator returns `None` when not matched. Setting this to `False` makes it behave like the `?` operator, and return no value at all, which may be a little faster (default=`True`)
 
@@ -52,7 +52,7 @@ The main tree class
 
 * `data` - The name of the rule or alias
 * `children` - List of matched sub-rules and terminals
-* `meta` - Line & Column numbers (unless `propagate_positions` is disabled)
+* `meta` - Line & Column numbers (if `propagate_positions` is enabled)
     * meta attributes: `line`, `column`, `start_pos`, `end_line`, `end_column`, `end_pos`
 
 #### \_\_init\_\_(self, data, children)
diff --git a/lark/lark.py b/lark/lark.py
index 33b57e3..3e69b7f 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -62,7 +62,7 @@ class LarkOptions(Serialize):
         'start': 'start',
         'priority': 'auto',
         'ambiguity': 'auto',
-        'propagate_positions': True,
+        'propagate_positions': False,
         'lexer_callbacks': {},
         'maybe_placeholders': True,
         'edit_terminals': None,
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 3f73990..72be997 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -151,7 +151,7 @@ class TestParsers(unittest.TestCase):
                     g = Lark(r"""start: a+
                                 a : "x" _NL?
                                 _NL: /\n/+
-                            """, parser='lalr', transformer=T() if internal else None)
+                            """, parser='lalr', transformer=T() if internal else None, propagate_positions=True)
                 except NotImplementedError:
                     assert internal
                     continue

From 4db56dc8b0ec408c3719f1e45138136730d6537c Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 16 Jan 2020 12:21:40 +0200
Subject: [PATCH 115/132] Added shebang to example script (Issue #504)

---
 examples/standalone/create_standalone.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/standalone/create_standalone.sh b/examples/standalone/create_standalone.sh
index 141ab89..d8da6b0 100755
--- a/examples/standalone/create_standalone.sh
+++ b/examples/standalone/create_standalone.sh
@@ -1 +1,2 @@
+#!/bin/sh
 PYTHONPATH=../.. python -m lark.tools.standalone json.lark > json_parser.py

From fcdba441b47b0fc86f8ce262b97645720f15f1ed Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 16 Jan 2020 12:39:08 +0200
Subject: [PATCH 116/132] Better error message for reduce/reduce conflict
 (Issue #135)

---
 lark/load_grammar.py          | 1 +
 lark/parsers/lalr_analysis.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 77095a8..356d03d 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -478,6 +478,7 @@ class Grammar:
         rules = []
         for name, rule_tree, options in rule_defs:
             ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options.keep_all_tokens else None
+            ebnf_to_bnf.prefix = name
             tree = transformer.transform(rule_tree)
             res = ebnf_to_bnf.transform(tree)
             rules.append((name, res, options))
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index 7822485..05c1ce8 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -253,10 +253,10 @@ class LALR_Analyzer(GrammarAnalyzer):
                 actions[la] = (Shift, next_state.closure)
             for la, rules in state.lookaheads.items():
                 if len(rules) > 1:
-                    raise GrammarError('Collision in %s: %s' % (la, ', '.join([ str(r) for r in rules ])))
+                    raise GrammarError('Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ])))
                 if la in actions:
                     if self.debug:
-                        logging.warning('Shift/reduce conflict for terminal %s: (resolving as shift)', la.name)
+                        logging.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
                         logging.warning(' * %s', list(rules)[0])
                 else:
                     actions[la] = (Reduce, list(rules)[0])

From 2f92c7b4a79298eb0e11fd87b2550ac1c7f3e73e Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 16 Jan 2020 14:50:10 +0200
Subject: [PATCH 117/132] Small addition to docs about terminal operators

---
 docs/grammar.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/grammar.md b/docs/grammar.md
index cc518e9..c36756b 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -54,6 +54,10 @@ Literals can be one of:
 * `/re with flags/imulx`
 * Literal range: `"a".."z"`, `"1".."9"`, etc.
 
+Terminals also support grammar operators, such as `|`, `+`, `*` and `?`.
+
+Terminals are a linear construct, and therefor may not contain themselves (recursion isn't allowed).
+
 ### Priority
 
 Terminals can be assigned priority only when using a lexer (future versions may support Earley's dynamic lexing).
@@ -74,7 +78,7 @@ When using a lexer (standard or contextual), it is the grammar-author's responsi
 IF: "if"
 INTEGER : /[0-9]+/
 INTEGER2 : ("0".."9")+          //# Same as INTEGER
-DECIMAL.2: INTEGER "." INTEGER  //# Will be matched before INTEGER
+DECIMAL.2: INTEGER? "." INTEGER  //# Will be matched before INTEGER
 WHITESPACE: (" " | /\t/ )+
 SQL_SELECT: "select"i
 ```

From 182385d7b7bedfb4b530c225cda5841b67ec2b3d Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sun, 19 Jan 2020 17:14:24 +0200
Subject: [PATCH 118/132] Removed bad syntax: *? and +? no longer accepted by
 the grammar parser (Issue #511)

---
 examples/lark.lark   | 2 +-
 lark/load_grammar.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/lark.lark b/examples/lark.lark
index 915cf2e..f1f42f6 100644
--- a/examples/lark.lark
+++ b/examples/lark.lark
@@ -33,7 +33,7 @@ name: RULE
     | TOKEN
 
 _VBAR: _NL? "|"
-OP: /[+*][?]?|[?](?![a-z])/
+OP: /[+*]|[?](?![a-z])/
 RULE: /!?[_?]?[a-z][_a-z0-9]*/
 TOKEN: /_?[A-Z][_A-Z0-9]*/
 STRING: _STRING "i"?
diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 356d03d..d57301b 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -73,7 +73,7 @@ TERMINALS = {
     '_RPAR': r'\)',
     '_LBRA': r'\[',
     '_RBRA': r'\]',
-    'OP': '[+*][?]?|[?](?![a-z])',
+    'OP': '[+*]|[?](?![a-z])',
     '_COLON': ':',
     '_COMMA': ',',
     '_OR': r'\|',

From ec67938933d087b048da196981e1f0c19080bae9 Mon Sep 17 00:00:00 2001
From: MegaIng <trampchamp@hotmail.de>
Date: Sun, 19 Jan 2020 21:01:53 +0100
Subject: [PATCH 119/132] Small correction in lark.lark

Added multi-name-imports to the lark.lark grammar.
---
 examples/lark.lark | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/lark.lark b/examples/lark.lark
index f1f42f6..a561361 100644
--- a/examples/lark.lark
+++ b/examples/lark.lark
@@ -13,7 +13,8 @@ statement: "%ignore" expansions _NL                -> ignore
          | "%import" import_args ["->" name] _NL   -> import
          | "%declare" name+                        -> declare
 
-import_args: "."? name ("." name)*
+import_args: import_path ["(" name ("," name)* ")"]
+import_path: "."? name ("." name)*
 
 ?expansions: alias (_VBAR alias)*
 

From 9552f001b2749e833db9e00c225323937245d632 Mon Sep 17 00:00:00 2001
From: MegaIng <trampchamp@hotmail.de>
Date: Sun, 19 Jan 2020 21:11:09 +0100
Subject: [PATCH 120/132] Update lark.lark

---
 examples/lark.lark | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/lark.lark b/examples/lark.lark
index a561361..c99d528 100644
--- a/examples/lark.lark
+++ b/examples/lark.lark
@@ -10,11 +10,12 @@ token: TOKEN priority? ":" expansions _NL
 priority: "." NUMBER
 
 statement: "%ignore" expansions _NL                -> ignore
-         | "%import" import_args ["->" name] _NL   -> import
+         | "%import" import_path ["->" name] _NL   -> import
+         | "%import" import_path name_list _NL     -> multi_import
          | "%declare" name+                        -> declare
 
-import_args: import_path ["(" name ("," name)* ")"]
-import_path: "."? name ("." name)*
+!import_path: "."? name ("." name)*
+name_list: "(" name ("," name)* ")"
 
 ?expansions: alias (_VBAR alias)*
 

From a7c9025858e57bde560e64a1a01c7166cf99259b Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 20 Jan 2020 12:20:38 +0200
Subject: [PATCH 121/132] Added 2 projects to 'Projects using Lark'

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 6f5ed74..920d047 100644
--- a/README.md
+++ b/README.md
@@ -141,6 +141,8 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail
  - [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration
  - [synapse](https://github.com/vertexproject/synapse) - an intelligence analysis platform
  - [Command-Block-Assembly](https://github.com/simon816/Command-Block-Assembly) - An assembly language, and C compiler, for Minecraft commands
+ - [Hyperledger Fabric Python SDK](https://github.com/hyperledger/fabric-sdk-py) - Fabric-SDK-Py is an implementation of the Hyperledger fabric SDK with Python 3.x
+ - [Datacube-core](https://github.com/opendatacube/datacube-core) - Open Data Cube analyses continental scale Earth Observation data through time
  - [SPFlow](https://github.com/SPFlow/SPFlow) - Library for Sum-Product Networks
  - [Torchani](https://github.com/aiqm/torchani) - Accurate Neural Network Potential on PyTorch
  - [required](https://github.com/shezadkhan137/required) - multi-field validation using docstrings

From f1b07e0571a6fdf38b9703f2ce7cc9c49f717644 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Mon, 20 Jan 2020 12:22:56 +0200
Subject: [PATCH 122/132] Small correction to 'Projects using Lark'

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 920d047..e6a52b9 100644
--- a/README.md
+++ b/README.md
@@ -140,11 +140,11 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail
  - [Hypothesis](https://github.com/HypothesisWorks/hypothesis) - Library for property-based testing
  - [mappyfile](https://github.com/geographika/mappyfile) - a MapFile parser for working with MapServer configuration
  - [synapse](https://github.com/vertexproject/synapse) - an intelligence analysis platform
- - [Command-Block-Assembly](https://github.com/simon816/Command-Block-Assembly) - An assembly language, and C compiler, for Minecraft commands
- - [Hyperledger Fabric Python SDK](https://github.com/hyperledger/fabric-sdk-py) - Fabric-SDK-Py is an implementation of the Hyperledger fabric SDK with Python 3.x
  - [Datacube-core](https://github.com/opendatacube/datacube-core) - Open Data Cube analyses continental scale Earth Observation data through time
  - [SPFlow](https://github.com/SPFlow/SPFlow) - Library for Sum-Product Networks
  - [Torchani](https://github.com/aiqm/torchani) - Accurate Neural Network Potential on PyTorch
+ - [Command-Block-Assembly](https://github.com/simon816/Command-Block-Assembly) - An assembly language, and C compiler, for Minecraft commands
+ - [Fabric-SDK-Py](https://github.com/hyperledger/fabric-sdk-py) - Hyperledger fabric SDK with Python 3.x
  - [required](https://github.com/shezadkhan137/required) - multi-field validation using docstrings
  - [miniwdl](https://github.com/chanzuckerberg/miniwdl) - A static analysis toolkit for the Workflow Description Language
  - [pytreeview](https://gitlab.com/parmenti/pytreeview) - a lightweight tree-based grammar explorer

From 52e510780a46b3240524cafdb2a5f0057580cebd Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 22 Jan 2020 12:58:03 +0200
Subject: [PATCH 123/132] Small stuff

---
 lark/reconstruct.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lark/reconstruct.py b/lark/reconstruct.py
index bd7b6a0..1e3adc7 100644
--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -28,8 +28,6 @@ class WriteTokensTransformer(Transformer_InPlace):
         self.term_subs = term_subs
 
     def __default__(self, data, children, meta):
-        #  if not isinstance(t, MatchTree):
-            #  return t
         if not getattr(meta, 'match_tree', False):
             return Tree(data, children)
 
@@ -97,11 +95,10 @@ class Reconstructor:
         self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs)
         self.rules = list(self._build_recons_rules(rules))
         self.rules.reverse()
-        # print(len(self.rules))
+
+        # Choose the best rule from each group of {rule => [rule.alias]}, since we only really need one derivation.
         self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion))
-        # print(len(self.rules))
 
-        # self.rules = list(set(list(self._build_recons_rules(rules))))
         self.rules.sort(key=lambda r: len(r.expansion))
         callbacks = {rule: rule.alias for rule in self.rules}   # TODO pass callbacks through dict, instead of alias?
         self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start),

From 94dd3646d4147d6a91f20f517565dabe3378e930 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 22 Jan 2020 15:07:01 +0200
Subject: [PATCH 124/132] Extend comments in rules tests

---
 tests/test_parser.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/test_parser.py b/tests/test_parser.py
index 4b3263d..7edfd3a 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -106,9 +106,12 @@ class TestParsers(unittest.TestCase):
     def test_comment_in_rule_definition(self):
         g = Lark("""start: a
                a: "a"
-               // A comment
-               // Another
+                // A comment
+                // Another comment
                 | "b"
+                // Still more
+
+               c: "unrelated"
             """)
         r = g.parse('b')
         self.assertEqual( r.children[0].data, "a" )

From 3688b0053b143c9e0717ecd3739b476e8c6ad0fc Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 22 Jan 2020 15:13:12 +0200
Subject: [PATCH 125/132] Disallow '. .' for '..' syntax (Issue #513)

---
 lark/load_grammar.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index ba26fe2..051f8cd 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -77,7 +77,8 @@ TERMINALS = {
     '_COLON': ':',
     '_COMMA': ',',
     '_OR': r'\|',
-    '_DOT': r'\.',
+    '_DOT': r'\.(?!\.)',
+    '_DOTDOT': r'\.\.',
     'TILDE': '~',
     'RULE': '!?[_?]?[a-z][_a-z0-9]*',
     'TERMINAL': '_?[A-Z][_A-Z0-9]*',
@@ -112,7 +113,7 @@ RULES = {
     '?expr': ['atom',
               'atom OP',
               'atom TILDE NUMBER',
-              'atom TILDE NUMBER _DOT _DOT NUMBER',
+              'atom TILDE NUMBER _DOTDOT NUMBER',
               ],
 
     '?atom': ['_LPAR expansions _RPAR',
@@ -130,7 +131,7 @@ RULES = {
     '?name': ['RULE', 'TERMINAL'],
 
     'maybe': ['_LBRA expansions _RBRA'],
-    'range': ['STRING _DOT _DOT STRING'],
+    'range': ['STRING _DOTDOT STRING'],
 
     'term': ['TERMINAL _COLON expansions _NL',
               'TERMINAL _DOT NUMBER _COLON expansions _NL'],

From 73427f785c767dbb06fb538f512602324a7d1cea Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 22 Jan 2020 15:27:05 +0200
Subject: [PATCH 126/132] Version bump (release 0.8)

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index 0906eb7..c2cf65e 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.8.0rc1"
+__version__ = "0.8.0"

From de1f619fcd6dd27c5284b718ad981c7bfcd39608 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Wed, 22 Jan 2020 22:43:01 +0200
Subject: [PATCH 127/132] Bugfix in stand-alone parser (Issue #514)

---
 examples/standalone/json_parser.py | 10 +++++-----
 lark/parse_tree_builder.py         |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/standalone/json_parser.py b/examples/standalone/json_parser.py
index f270ade..04fd6ac 100644
--- a/examples/standalone/json_parser.py
+++ b/examples/standalone/json_parser.py
@@ -1,4 +1,4 @@
-# The file was automatically generated by Lark v0.8.0rc1
+# The file was automatically generated by Lark v0.8.0
 #
 #
 #   Lark Stand-alone Generator Tool
@@ -1432,7 +1432,7 @@ def inplace_transformer(func):
     return f
 
 def apply_visit_wrapper(func, name, wrapper):
-    if wrapper is visitors._vargs_meta or wrapper is visitors._vargs_meta_inline:
+    if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
         raise NotImplementedError("Meta args not supported for internal transformer")
     @wraps(func)
     def f(children):
@@ -1815,7 +1815,7 @@ class LarkOptions(Serialize):
         'start': 'start',
         'priority': 'auto',
         'ambiguity': 'auto',
-        'propagate_positions': True,
+        'propagate_positions': False,
         'lexer_callbacks': {},
         'maybe_placeholders': True,
         'edit_terminals': None,
@@ -2047,10 +2047,10 @@ class Lark(Serialize):
 
 
 DATA = (
-{'rules': [{'@': 27}, {'@': 31}, {'@': 26}, {'@': 13}, {'@': 25}, {'@': 18}, {'@': 16}, {'@': 24}, {'@': 22}, {'@': 17}, {'@': 28}, {'@': 30}, {'@': 20}, {'@': 29}, {'@': 21}, {'@': 23}, {'@': 15}, {'@': 19}, {'@': 12}, {'@': 14}], 'parser': {'lexer_conf': {'tokens': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], 'ignore': [u'WS'], '__type__': 'LexerConf'}, 'parser': {'tokens': {0: 'LBRACE', 1: u'FALSE', 2: u'string', 3: u'object', 4: u'NULL', 5: u'SIGNED_NUMBER', 6: u'value', 7: 'start', 8: 'LSQB', 9: u'ESCAPED_STRING', 10: u'TRUE', 11: u'array', 12: 'COMMA', 13: 'RBRACE', 14: u'pair', 15: 'COLON', 16: 'RSQB', 17: '$END', 18: '__anon_star_1', 19: '__anon_star_0'}, 'states': {0: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 28), 7: (0, 11), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 1: {12: (0, 2), 13: (0, 31)}, 2: {9: (0, 20), 2: (0, 4), 14: (0, 6)}, 3: {12: (1, {'@': 12}), 13: (1, {'@': 12})}, 4: {15: (0, 15)}, 5: {16: (1, {'@': 13}), 17: (1, {'@': 13}), 12: (1, {'@': 13}), 13: (1, {'@': 13})}, 6: {12: (1, {'@': 14}), 13: (1, {'@': 14})}, 7: {16: (1, {'@': 15}), 12: (1, {'@': 15})}, 8: {16: (1, {'@': 16}), 17: (1, {'@': 16}), 12: (1, {'@': 16}), 13: (1, {'@': 16})}, 9: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 14), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 10: {16: (1, {'@': 17}), 17: (1, {'@': 17}), 12: (1, {'@': 17}), 13: (1, {'@': 17})}, 11: {}, 12: {18: (0, 1), 12: (0, 21), 13: (0, 16)}, 13: {16: (1, {'@': 18}), 17: (1, {'@': 18}), 12: (1, {'@': 18}), 13: (1, {'@': 18})}, 14: {16: (1, {'@': 19}), 12: (1, {'@': 19})}, 15: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 17), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 16: {16: (1, {'@': 20}), 17: (1, {'@': 20}), 12: (1, {'@': 20}), 13: (1, {'@': 20})}, 17: {12: (1, {'@': 21}), 13: (1, {'@': 21})}, 18: {16: (1, {'@': 22}), 17: (1, {'@': 22}), 12: (1, {'@': 22}), 13: (1, {'@': 22})}, 19: {16: (0, 18), 12: (0, 9)}, 20: {16: (1, {'@': 23}), 17: (1, {'@': 23}), 12: (1, {'@': 23}), 13: (1, {'@': 23}), 15: (1, {'@': 23})}, 21: {9: (0, 20), 2: (0, 4), 14: (0, 3)}, 22: {16: (0, 10), 19: (0, 19), 12: (0, 27)}, 23: {16: (1, {'@': 24}), 17: (1, {'@': 24}), 12: (1, {'@': 24}), 13: (1, {'@': 24})}, 24: {16: (1, {'@': 25}), 17: (1, {'@': 25}), 12: (1, {'@': 25}), 13: (1, {'@': 25})}, 25: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 22), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26), 16: (0, 29)}, 26: {16: (1, {'@': 26}), 17: (1, {'@': 26}), 12: (1, {'@': 26}), 13: (1, {'@': 26})}, 27: {0: (0, 33), 1: (0, 8), 2: (0, 5), 3: (0, 32), 4: (0, 23), 5: (0, 24), 6: (0, 7), 8: (0, 25), 9: (0, 20), 10: (0, 13), 11: (0, 26)}, 28: {17: (1, {'@': 27})}, 29: {16: (1, {'@': 28}), 17: (1, {'@': 28}), 12: (1, {'@': 28}), 13: (1, {'@': 28})}, 30: {16: (1, {'@': 29}), 17: (1, {'@': 29}), 12: (1, {'@': 29}), 13: (1, {'@': 29})}, 31: {16: (1, {'@': 30}), 17: (1, {'@': 30}), 12: (1, {'@': 30}), 13: (1, {'@': 30})}, 32: {16: (1, {'@': 31}), 17: (1, {'@': 31}), 12: (1, {'@': 31}), 13: (1, {'@': 31})}, 33: {9: (0, 20), 2: (0, 4), 13: (0, 30), 14: (0, 12)}}, 'end_states': {'start': 11}, 'start_states': {'start': 0}}, '__type__': 'LALR_ContextualLexer', 'start': ['start']}, '__type__': 'Lark', 'options': {'transformer': None, 'lexer': 'contextual', 'lexer_callbacks': {}, 'debug': False, 'postlex': None, 'parser': 'lalr', 'cache_grammar': False, 'tree_class': None, 'priority': None, 'start': ['start'], 'keep_all_tokens': False, 'ambiguity': 'auto', 'edit_terminals': None, 'propagate_positions': True, 'maybe_placeholders': True}}
+{'rules': [{'@': 26}, {'@': 30}, {'@': 25}, {'@': 31}, {'@': 23}, {'@': 19}, {'@': 14}, {'@': 22}, {'@': 27}, {'@': 16}, {'@': 28}, {'@': 12}, {'@': 24}, {'@': 29}, {'@': 20}, {'@': 21}, {'@': 15}, {'@': 13}, {'@': 17}, {'@': 18}], 'parser': {'lexer_conf': {'tokens': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], 'ignore': [u'WS'], '__type__': 'LexerConf'}, 'parser': {'tokens': {0: 'RSQB', 1: 'COMMA', 2: 'RBRACE', 3: '$END', 4: u'__array_star_0', 5: 'COLON', 6: u'pair', 7: u'ESCAPED_STRING', 8: u'string', 9: 'LBRACE', 10: u'FALSE', 11: u'object', 12: u'NULL', 13: u'SIGNED_NUMBER', 14: u'value', 15: u'array', 16: u'TRUE', 17: 'LSQB', 18: u'__object_star_1', 19: 'start'}, 'states': {0: {0: (1, {'@': 12}), 1: (1, {'@': 12}), 2: (1, {'@': 12}), 3: (1, {'@': 12})}, 1: {0: (0, 11), 1: (0, 20), 4: (0, 17)}, 2: {1: (0, 23), 2: (0, 0)}, 3: {5: (0, 12)}, 4: {8: (0, 3), 6: (0, 13), 7: (0, 21)}, 5: {8: (0, 3), 2: (0, 30), 6: (0, 19), 7: (0, 21)}, 6: {0: (0, 29), 7: (0, 21), 8: (0, 33), 9: (0, 5), 10: (0, 8), 11: (0, 31), 12: (0, 22), 13: (0, 24), 14: (0, 1), 15: (0, 26), 16: (0, 16), 17: (0, 6)}, 7: {0: (1, {'@': 13}), 1: (1, {'@': 13})}, 8: {0: (1, {'@': 14}), 1: (1, {'@': 14}), 2: (1, {'@': 14}), 3: (1, {'@': 14})}, 9: {0: (1, {'@': 15}), 1: (1, {'@': 15})}, 10: {7: (0, 21), 8: (0, 33), 9: (0, 5), 10: (0, 8), 11: (0, 31), 12: (0, 22), 13: (0, 24), 14: (0, 7), 15: (0, 26), 16: (0, 16), 17: (0, 6)}, 11: {0: (1, {'@': 16}), 1: (1, {'@': 16}), 2: (1, {'@': 16}), 3: (1, {'@': 16})}, 12: {7: (0, 21), 8: (0, 33), 9: (0, 5), 10: (0, 8), 11: (0, 31), 12: (0, 22), 13: (0, 24), 14: (0, 18), 15: (0, 26), 16: (0, 16), 17: (0, 6)}, 13: {1: (1, {'@': 17}), 2: (1, {'@': 17})}, 14: {}, 15: {1: (1, {'@': 18}), 2: (1, {'@': 18})}, 16: {0: (1, {'@': 19}), 1: (1, {'@': 19}), 2: (1, {'@': 19}), 3: (1, {'@': 19})}, 17: {0: (0, 28), 1: (0, 10)}, 18: {1: (1, {'@': 20}), 2: (1, {'@': 20})}, 19: {1: (0, 4), 18: (0, 2), 2: (0, 25)}, 20: {7: (0, 21), 8: (0, 33), 9: (0, 5), 10: (0, 8), 11: (0, 31), 12: (0, 22), 13: (0, 24), 14: (0, 9), 15: (0, 26), 16: (0, 16), 17: (0, 6)}, 21: {0: (1, {'@': 21}), 1: (1, {'@': 21}), 2: (1, {'@': 21}), 3: (1, {'@': 21}), 5: (1, {'@': 21})}, 22: {0: (1, {'@': 22}), 1: (1, {'@': 22}), 2: (1, {'@': 22}), 3: (1, {'@': 22})}, 23: {8: (0, 3), 6: (0, 15), 7: (0, 21)}, 24: {0: (1, {'@': 23}), 1: (1, {'@': 23}), 2: (1, {'@': 23}), 3: (1, {'@': 23})}, 25: {0: (1, {'@': 24}), 1: (1, {'@': 24}), 2: (1, {'@': 24}), 3: (1, {'@': 24})}, 26: {0: (1, {'@': 25}), 1: (1, {'@': 25}), 2: (1, {'@': 25}), 3: (1, {'@': 25})}, 27: {3: (1, {'@': 26})}, 28: {0: (1, {'@': 27}), 1: (1, {'@': 27}), 2: (1, {'@': 27}), 3: (1, {'@': 27})}, 29: {0: (1, {'@': 28}), 1: (1, {'@': 28}), 2: (1, {'@': 28}), 3: (1, {'@': 28})}, 30: {0: (1, {'@': 29}), 1: (1, {'@': 29}), 2: (1, {'@': 29}), 3: (1, {'@': 29})}, 31: {0: (1, {'@': 30}), 1: (1, {'@': 30}), 2: (1, {'@': 30}), 3: (1, {'@': 30})}, 32: {7: (0, 21), 8: (0, 33), 9: (0, 5), 10: (0, 8), 11: (0, 31), 12: (0, 22), 13: (0, 24), 14: (0, 27), 15: (0, 26), 16: (0, 16), 17: (0, 6), 19: (0, 14)}, 33: {0: (1, {'@': 31}), 1: (1, {'@': 31}), 2: (1, {'@': 31}), 3: (1, {'@': 31})}}, 'end_states': {'start': 14}, 'start_states': {'start': 32}}, '__type__': 'LALR_ContextualLexer', 'start': ['start']}, '__type__': 'Lark', 'options': {'transformer': None, 'lexer': 'contextual', 'lexer_callbacks': {}, 'debug': False, 'postlex': None, 'parser': 'lalr', 'cache_grammar': False, 'tree_class': None, 'priority': None, 'start': ['start'], 'keep_all_tokens': False, 'ambiguity': 'auto', 'edit_terminals': None, 'propagate_positions': False, 'maybe_placeholders': True}}
 )
 MEMO = (
-{0: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [2, 4294967295], 'flags': [], 'value': u'\\".*?(?<!\\\\)(\\\\\\\\)*?\\"'}, '__type__': 'TerminalDef', 'name': u'ESCAPED_STRING'}, 1: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:[ \t\x0c\r\n])+'}, '__type__': 'TerminalDef', 'name': u'WS'}, 2: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)'}, '__type__': 'TerminalDef', 'name': u'SIGNED_NUMBER'}, 3: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'true'}, '__type__': 'TerminalDef', 'name': u'TRUE'}, 4: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'false'}, '__type__': 'TerminalDef', 'name': u'FALSE'}, 5: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'null'}, '__type__': 'TerminalDef', 'name': u'NULL'}, 6: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u','}, '__type__': 'TerminalDef', 'name': 'COMMA'}, 7: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'['}, '__type__': 'TerminalDef', 'name': 'LSQB'}, 8: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u']'}, '__type__': 'TerminalDef', 'name': 'RSQB'}, 9: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'{'}, '__type__': 'TerminalDef', 'name': 'LBRACE'}, 10: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'}'}, '__type__': 'TerminalDef', 'name': 'RBRACE'}, 11: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u':'}, '__type__': 'TerminalDef', 'name': 'COLON'}, 12: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 13: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 2}, 14: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': '__anon_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 15: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 16: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'FALSE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'false', 'order': 5}, 17: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 18: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'TRUE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'true', 'order': 4}, 19: {'origin': {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': '__anon_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 20: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 21: {'origin': {'__type__': 'NonTerminal', 'name': u'pair'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COLON'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 22: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'__type__': 'NonTerminal', 'name': '__anon_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 23: {'origin': {'__type__': 'NonTerminal', 'name': u'string'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'ESCAPED_STRING'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 24: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'NULL'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'null', 'order': 6}, 25: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'SIGNED_NUMBER'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'number', 'order': 3}, 26: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'array'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 1}, 27: {'origin': {'__type__': 'NonTerminal', 'name': u'start'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 28: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 29: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 30: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'__type__': 'NonTerminal', 'name': '__anon_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 31: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'object'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}}
+{0: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [2, 4294967295], 'flags': [], 'value': u'\\".*?(?<!\\\\)(\\\\\\\\)*?\\"'}, '__type__': 'TerminalDef', 'name': u'ESCAPED_STRING'}, 1: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:[ \t\x0c\r\n])+'}, '__type__': 'TerminalDef', 'name': u'WS'}, 2: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)'}, '__type__': 'TerminalDef', 'name': u'SIGNED_NUMBER'}, 3: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'true'}, '__type__': 'TerminalDef', 'name': u'TRUE'}, 4: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'false'}, '__type__': 'TerminalDef', 'name': u'FALSE'}, 5: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'null'}, '__type__': 'TerminalDef', 'name': u'NULL'}, 6: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u','}, '__type__': 'TerminalDef', 'name': 'COMMA'}, 7: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'['}, '__type__': 'TerminalDef', 'name': 'LSQB'}, 8: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u']'}, '__type__': 'TerminalDef', 'name': 'RSQB'}, 9: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'{'}, '__type__': 'TerminalDef', 'name': 'LBRACE'}, 10: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'}'}, '__type__': 'TerminalDef', 'name': 'RBRACE'}, 11: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u':'}, '__type__': 'TerminalDef', 'name': 'COLON'}, 12: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'__type__': 'NonTerminal', 'name': u'__object_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 13: {'origin': {'__type__': 'NonTerminal', 'name': u'__array_star_0'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'__array_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 14: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'FALSE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'false', 'order': 5}, 15: {'origin': {'__type__': 'NonTerminal', 'name': u'__array_star_0'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 16: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 17: {'origin': {'__type__': 'NonTerminal', 'name': u'__object_star_1'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 18: {'origin': {'__type__': 'NonTerminal', 'name': u'__object_star_1'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'__object_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 19: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'TRUE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'true', 'order': 4}, 20: {'origin': {'__type__': 'NonTerminal', 'name': u'pair'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COLON'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 21: {'origin': {'__type__': 'NonTerminal', 'name': u'string'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'ESCAPED_STRING'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 22: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'NULL'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'null', 'order': 6}, 23: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'SIGNED_NUMBER'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': u'number', 'order': 3}, 24: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 1}, 25: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'array'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 1}, 26: {'origin': {'__type__': 'NonTerminal', 'name': u'start'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 27: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'__type__': 'NonTerminal', 'name': u'__array_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 0}, 28: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 29: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'priority': None, 'empty_indices': [False, True, False], 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': False}, 'alias': None, 'order': 2}, 30: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'object'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 0}, 31: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}], 'options': {'priority': None, 'empty_indices': (), 'keep_all_tokens': False, '__type__': 'RuleOptions', 'expand1': True}, 'alias': None, 'order': 2}}
 )
 Shift = 0
 Reduce = 1
diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py
index 4ee0071..11c7fac 100644
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -3,7 +3,7 @@ from .lexer import Token
 from .tree import Tree
 from .visitors import InlineTransformer # XXX Deprecated
 from .visitors import Transformer_InPlace
-from . import visitors
+from .visitors import _vargs_meta, _vargs_meta_inline
 
 ###{standalone
 from functools import partial, wraps
@@ -204,7 +204,7 @@ def inplace_transformer(func):
     return f
 
 def apply_visit_wrapper(func, name, wrapper):
-    if wrapper is visitors._vargs_meta or wrapper is visitors._vargs_meta_inline:
+    if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
         raise NotImplementedError("Meta args not supported for internal transformer")
     @wraps(func)
     def f(children):

From c9c1ea90e8aff01244b878ed1cd510fbfeb7973c Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 23 Jan 2020 01:11:28 +0200
Subject: [PATCH 128/132] Revert maybe_placeholders to be False by default.. It
 should be changed in a major release, not 0.8 (Issue #515)

---
 lark/lark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/lark.py b/lark/lark.py
index 3e69b7f..01eca80 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -64,7 +64,7 @@ class LarkOptions(Serialize):
         'ambiguity': 'auto',
         'propagate_positions': False,
         'lexer_callbacks': {},
-        'maybe_placeholders': True,
+        'maybe_placeholders': False,
         'edit_terminals': None,
     }
 

From 5346231e14d31ab5bbc3cbc014a31b405d40ef39 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 23 Jan 2020 01:12:12 +0200
Subject: [PATCH 129/132] Version bump

---
 lark/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index c2cf65e..1a00c9d 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.8.0"
+__version__ = "0.8.1"

From 3995ad913afab34b8ebc6110a45c0d90d23ca777 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Tue, 28 Jan 2020 16:16:48 +0200
Subject: [PATCH 130/132] Tiny tiny cleanup

---
 lark/parsers/lalr_analysis.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index 05c1ce8..8890c3c 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -262,23 +262,23 @@ class LALR_Analyzer(GrammarAnalyzer):
                     actions[la] = (Reduce, list(rules)[0])
             m[state] = { k.name: v for k, v in actions.items() }
 
-        self.states = { k.closure: v for k, v in m.items() }
+        states = { k.closure: v for k, v in m.items() }
 
         # compute end states
         end_states = {}
-        for state in self.states:
+        for state in states:
             for rp in state:
                 for start in self.lr0_start_states:
                     if rp.rule.origin.name == ('$root_' + start) and rp.is_satisfied:
                         assert(not start in end_states)
                         end_states[start] = state
 
-        self._parse_table = ParseTable(self.states, { start: state.closure for start, state in self.lr0_start_states.items() }, end_states)
+        _parse_table = ParseTable(states, { start: state.closure for start, state in self.lr0_start_states.items() }, end_states)
 
         if self.debug:
-            self.parse_table = self._parse_table
+            self.parse_table = _parse_table
         else:
-            self.parse_table = IntParseTable.from_ParseTable(self._parse_table)
+            self.parse_table = IntParseTable.from_ParseTable(_parse_table)
 
     def compute_lalr(self):
         self.compute_lr0_states()

From 93976e360ecc1470ae0653cff92e02ec36966f92 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 1 Feb 2020 09:14:07 +0200
Subject: [PATCH 131/132] Fixed docs for maybe_placeholders

---
 docs/classes.md           | 2 +-
 docs/tree_construction.md | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/classes.md b/docs/classes.md
index 284ce73..fd9ee3d 100644
--- a/docs/classes.md
+++ b/docs/classes.md
@@ -34,7 +34,7 @@ The Lark class accepts a grammar string or file object, and keyword options:
 
 * **propagate_positions** - Propagate line/column count to tree nodes, at the cost of performance (default=`False`)
 
-* **maybe_placeholders** - The `[]` operator returns `None` when not matched. Setting this to `False` makes it behave like the `?` operator, and return no value at all, which may be a little faster (default=`True`)
+* **maybe_placeholders** - When True, the `[]` operator returns `None` when not matched. When `False`,  `[]` behaves like the `?` operator, and return no value at all, which may be a little faster (default=`False`)
 
 * **lexer_callbacks** - A dictionary of callbacks of type f(Token) -> Token, used to interface with the lexer Token generation. Only works with the standard and contextual lexers. See [Recipes](recipes.md) for more information.
 
diff --git a/docs/tree_construction.md b/docs/tree_construction.md
index 9e61d4d..a4d6088 100644
--- a/docs/tree_construction.md
+++ b/docs/tree_construction.md
@@ -9,7 +9,9 @@ Using `item+` or `item*` will result in a list of items, equivalent to writing `
 
 Using `item?` will return the item if it matched, or nothing.
 
-Using `[item]` will return the item if it matched, or the value `None`, if it didn't. It's possible to force `[]` to behave like `()?`, by using the `maybe_placeholders=False` option when initializing Lark.
+If `maybe_placeholders=False` (the default), then `[]` behaves like `()?`.
+
+If `maybe_placeholders=True`, then using `[item]` will return the item if it matched, or the value `None`, if it didn't.
 
 ### Terminals
 

From a55b7155b51418444f856f55c31abee7a688380f Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 8 Feb 2020 05:37:45 +0200
Subject: [PATCH 132/132] Added support for v_args in Interpreter (Issue #520)

---
 lark/visitors.py | 57 ++++++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/lark/visitors.py b/lark/visitors.py
index da6b1d5..30a2a65 100644
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -13,7 +13,31 @@ class Discard(Exception):
 
 # Transformers
 
-class Transformer:
+class _Decoratable:
+    @classmethod
+    def _apply_decorator(cls, decorator, **kwargs):
+        mro = getmro(cls)
+        assert mro[0] is cls
+        libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
+        for name, value in getmembers(cls):
+
+            # Make sure the function isn't inherited (unless it's overwritten)
+            if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
+                continue
+            if not callable(cls.__dict__[name]):
+                continue
+
+            # Skip if v_args already applied (at the function level)
+            if hasattr(cls.__dict__[name], 'vargs_applied'):
+                continue
+
+            static = isinstance(cls.__dict__[name], (staticmethod, classmethod))
+            setattr(cls, name, decorator(value, static=static, **kwargs))
+        return cls
+
+
+
+class Transformer(_Decoratable):
     """Visits the tree recursively, starting with the leaves and finally the root (bottom-up)
 
     Calls its methods (provided by user via inheritance) according to tree.data
@@ -90,27 +114,6 @@ class Transformer:
         return token
 
 
-    @classmethod
-    def _apply_decorator(cls, decorator, **kwargs):
-        mro = getmro(cls)
-        assert mro[0] is cls
-        libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
-        for name, value in getmembers(cls):
-
-            # Make sure the function isn't inherited (unless it's overwritten)
-            if name.startswith('_') or (name in libmembers and name not in cls.__dict__):
-                continue
-            if not callable(cls.__dict__[name]):
-                continue
-
-            # Skip if v_args already applied (at the function level)
-            if hasattr(cls.__dict__[name], 'vargs_applied'):
-                continue
-
-            static = isinstance(cls.__dict__[name], (staticmethod, classmethod))
-            setattr(cls, name, decorator(value, static=static, **kwargs))
-        return cls
-
 
 class InlineTransformer(Transformer):   # XXX Deprecated
     def _call_userfunc(self, tree, new_children=None):
@@ -221,7 +224,7 @@ def visit_children_decor(func):
     return inner
 
 
-class Interpreter:
+class Interpreter(_Decoratable):
     """Top-down visitor, recursive
 
     Visits the tree, starting with the root and finally the leaves (top-down)
@@ -230,8 +233,14 @@ class Interpreter:
     Unlike Transformer and Visitor, the Interpreter doesn't automatically visit its sub-branches.
     The user has to explicitly call visit_children, or use the @visit_children_decor
     """
+
     def visit(self, tree):
-        return getattr(self, tree.data)(tree)
+        f = getattr(self, tree.data)
+        wrapper = getattr(f, 'visit_wrapper', None)
+        if wrapper is not None:
+            return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
+        else:
+            return f(tree)
 
     def visit_children(self, tree):
         return [self.visit(child) if isinstance(child, Tree) else child