From 5c8a25c7333ea685d5816dea186f0cf389d6d7f5 Mon Sep 17 00:00:00 2001
From: pwwang <pwwang@pwwang.com>
Date: Tue, 30 Jun 2020 18:18:49 -0500
Subject: [PATCH 01/25] Avoid using root logger

---
 docs/how_to_use.md                 |  7 ++--
 lark/__init__.py                   |  1 +
 lark/common.py                     |  7 ++++
 lark/lark.py                       |  8 ++--
 lark/parsers/earley.py             |  4 +-
 lark/parsers/lalr_analysis.py      |  6 +--
 tests/__main__.py                  |  7 +++-
 tests/test_logger.py               | 65 ++++++++++++++++++++++++++++++
 tests/test_nearley/test_nearley.py |  7 ++--
 tests/test_parser.py               |  3 +-
 10 files changed, 97 insertions(+), 18 deletions(-)
 create mode 100644 tests/test_logger.py

diff --git a/docs/how_to_use.md b/docs/how_to_use.md
index 886b440..78f4df2 100644
--- a/docs/how_to_use.md
+++ b/docs/how_to_use.md
@@ -30,12 +30,13 @@ Use the reference pages for more in-depth explanations. (links in the [main page
 
 ## LALR usage
 
-By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure `logging` framework beforehand. For example:
+By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure the `LOGGER` beforehand. For example:
 
 ```python
-from lark import Lark
 import logging
-logging.basicConfig(level=logging.DEBUG)
+from lark import Lark, LOGGER
+
+LOGGER.setLevel(logging.DEBUG)
 
 collision_grammar = '''
 start: as as
diff --git a/lark/__init__.py b/lark/__init__.py
index 9e50691..e4c54dd 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -1,3 +1,4 @@
+from .common import LOGGER
 from .tree import Tree
 from .visitors import Transformer, Visitor, v_args, Discard
 from .visitors import InlineTransformer, inline_args   # XXX Deprecated
diff --git a/lark/common.py b/lark/common.py
index c44f9ce..aac9d75 100644
--- a/lark/common.py
+++ b/lark/common.py
@@ -1,6 +1,13 @@
+import logging
 from .utils import Serialize
 from .lexer import TerminalDef
 
+LOGGER = logging.getLogger("LARK")
+LOGGER.addHandler(logging.StreamHandler())
+# Set to highest level, since we have some warnings amongst the code
+# By default, we should not output any log messages
+LOGGER.setLevel(logging.CRITICAL)
+
 ###{standalone
 
 class LexerConf(Serialize):
diff --git a/lark/lark.py b/lark/lark.py
index 2b783cb..8df2b87 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -1,13 +1,13 @@
 from __future__ import absolute_import
 
-import sys, os, pickle, hashlib, logging
+import sys, os, pickle, hashlib
 from io import open
 
 
 from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS
 from .load_grammar import load_grammar
 from .tree import Tree
-from .common import LexerConf, ParserConf
+from .common import LexerConf, ParserConf, LOGGER
 
 from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken
 from .parse_tree_builder import ParseTreeBuilder
@@ -205,7 +205,7 @@ class Lark(Serialize):
                 cache_fn = '.lark_cache_%s.tmp' % md5
 
             if FS.exists(cache_fn):
-                logging.debug('Loading grammar from cache: %s', cache_fn)
+                LOGGER.debug('Loading grammar from cache: %s', cache_fn)
                 with FS.open(cache_fn, 'rb') as f:
                     self._load(f, self.options.transformer, self.options.postlex)
                 return
@@ -284,7 +284,7 @@ class Lark(Serialize):
             self.lexer = self._build_lexer()
 
         if cache_fn:
-            logging.debug('Saving grammar to cache: %s', cache_fn)
+            LOGGER.debug('Saving grammar to cache: %s', cache_fn)
             with FS.open(cache_fn, 'wb') as f:
                 self.save(f)
 
diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index 59e9a06..5fc7531 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -10,11 +10,11 @@ is better documented here:
     http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
 """
 
-import logging
 from collections import deque
 
 from ..visitors import Transformer_InPlace, v_args
 from ..exceptions import UnexpectedEOF, UnexpectedToken
+from ..common import LOGGER
 from .grammar_analysis import GrammarAnalyzer
 from ..grammar import NonTerminal
 from .earley_common import Item, TransitiveItem
@@ -301,7 +301,7 @@ class Parser:
             try:
                 debug_walker = ForestToPyDotVisitor()
             except ImportError:
-                logging.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
+                LOGGER.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
             else:
                 debug_walker.visit(solutions[0], "sppf.png")
 
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index 8890c3c..6fefa4c 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -6,11 +6,11 @@ For now, shift/reduce conflicts are automatically resolved as shifts.
 # Author: Erez Shinan (2017)
 # Email : erezshin@gmail.com
 
-import logging
 from collections import defaultdict, deque
 
 from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
 from ..exceptions import GrammarError
+from ..common import LOGGER
 
 from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
 from ..grammar import Rule
@@ -256,8 +256,8 @@ class LALR_Analyzer(GrammarAnalyzer):
                     raise GrammarError('Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ])))
                 if la in actions:
                     if self.debug:
-                        logging.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
-                        logging.warning(' * %s', list(rules)[0])
+                        LOGGER.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
+                        LOGGER.warning(' * %s', list(rules)[0])
                 else:
                     actions[la] = (Reduce, list(rules)[0])
             m[state] = { k.name: v for k, v in actions.items() }
diff --git a/tests/__main__.py b/tests/__main__.py
index cb26eb4..1807aa8 100644
--- a/tests/__main__.py
+++ b/tests/__main__.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function
 
 import unittest
 import logging
+from lark import LOGGER
 
 from .test_trees import TestTrees
 from .test_tools import TestStandalone
@@ -11,11 +12,13 @@ from .test_reconstructor import TestReconstructor
 try:
     from .test_nearley.test_nearley import TestNearley
 except ImportError:
-    logging.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")
+    LOGGER.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")
 
 # from .test_selectors import TestSelectors
 # from .test_grammars import TestPythonG, TestConfigG
 
+from .test_logger import TestLogger
+
 from .test_parser import (
         TestLalrStandard,
         TestEarleyStandard,
@@ -31,7 +34,7 @@ from .test_parser import (
         TestParsers,
         )
 
-logging.basicConfig(level=logging.INFO)
+LOGGER.setLevel(logging.INFO)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/test_logger.py b/tests/test_logger.py
new file mode 100644
index 0000000..dd6beb3
--- /dev/null
+++ b/tests/test_logger.py
@@ -0,0 +1,65 @@
+import logging
+from contextlib import contextmanager
+from lark import Lark, LOGGER
+from unittest import TestCase, main
+
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+
+@contextmanager
+def capture_log():
+    stream = StringIO()
+    orig_handler = LOGGER.handlers[0]
+    del LOGGER.handlers[:]
+    LOGGER.addHandler(logging.StreamHandler(stream))
+    yield stream
+    del LOGGER.handlers[:]
+    LOGGER.addHandler(orig_handler)
+
+class TestLogger(TestCase):
+
+    def test_debug(self):
+        LOGGER.setLevel(logging.DEBUG)
+        collision_grammar = '''
+        start: as as
+        as: a*
+        a: "a"
+        '''
+        with capture_log() as log:
+            Lark(collision_grammar, parser='lalr', debug=True)
+
+        log = log.getvalue()
+        self.assertIn("Shift/Reduce conflict for terminal", log)
+        self.assertIn("A: (resolving as shift)", log)
+        self.assertIn("Shift/Reduce conflict for terminal A: (resolving as shift)", log)
+
+    def test_non_debug(self):
+        LOGGER.setLevel(logging.DEBUG)
+        collision_grammar = '''
+        start: as as
+        as: a*
+        a: "a"
+        '''
+        with capture_log() as log:
+            Lark(collision_grammar, parser='lalr', debug=False)
+        log = log.getvalue()
+        # no log messge
+        self.assertEqual(len(log), 0)
+
+    def test_loglevel_higher(self):
+        LOGGER.setLevel(logging.ERROR)
+        collision_grammar = '''
+        start: as as
+        as: a*
+        a: "a"
+        '''
+        with capture_log() as log:
+            Lark(collision_grammar, parser='lalr', debug=True)
+        log = log.getvalue()
+        # no log messge
+        self.assertEqual(len(log), 0)
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/test_nearley/test_nearley.py b/tests/test_nearley/test_nearley.py
index 647f489..345af8a 100644
--- a/tests/test_nearley/test_nearley.py
+++ b/tests/test_nearley/test_nearley.py
@@ -6,16 +6,17 @@ import logging
 import os
 import codecs
 
-logging.basicConfig(level=logging.INFO)
-
+from lark import LOGGER
 from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main
 
+LOGGER.setLevel(logging.INFO)
+
 TEST_PATH    = os.path.abspath(os.path.dirname(__file__))
 NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley')
 BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin')
 
 if not os.path.exists(NEARLEY_PATH):
-    logging.warn("Nearley not installed. Skipping Nearley tests!")
+    LOGGER.warn("Nearley not installed. Skipping Nearley tests!")
     raise ImportError("Skipping Nearley tests!")
 
 import js2py    # Ensures that js2py exists, to avoid failing tests
diff --git a/tests/test_parser.py b/tests/test_parser.py
index df09307..5a10b9f 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -18,13 +18,13 @@ from io import (
         open,
     )
 
-logging.basicConfig(level=logging.INFO)
 
 try:
     import regex
 except ImportError:
     regex = None
 
+from lark import LOGGER
 from lark.lark import Lark
 from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
 from lark.tree import Tree
@@ -32,6 +32,7 @@ from lark.visitors import Transformer, Transformer_InPlace, v_args
 from lark.grammar import Rule
 from lark.lexer import TerminalDef, Lexer, TraditionalLexer
 
+LOGGER.setLevel(logging.INFO)
 
 
 __path__ = os.path.dirname(__file__)

From a6201b41e471897ef044696925911df86b94a886 Mon Sep 17 00:00:00 2001
From: pwwang <1188067+pwwang@users.noreply.github.com>
Date: Tue, 30 Jun 2020 17:35:26 -0700
Subject: [PATCH 02/25] Lowercase logger name

---
 lark/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/common.py b/lark/common.py
index aac9d75..3bd7c98 100644
--- a/lark/common.py
+++ b/lark/common.py
@@ -2,7 +2,7 @@ import logging
 from .utils import Serialize
 from .lexer import TerminalDef
 
-LOGGER = logging.getLogger("LARK")
+LOGGER = logging.getLogger("lark")
 LOGGER.addHandler(logging.StreamHandler())
 # Set to highest level, since we have some warnings amongst the code
 # By default, we should not output any log messages

From 2a73afd3554c29f216869bc3e70f971f74b62c13 Mon Sep 17 00:00:00 2001
From: pwwang <pwwang@pwwang.com>
Date: Thu, 2 Jul 2020 19:28:45 -0500
Subject: [PATCH 03/25] Change LOGGER to logger

---
 docs/how_to_use.md                 |  6 +++---
 lark/__init__.py                   |  2 +-
 lark/common.py                     |  6 +++---
 lark/lark.py                       |  6 +++---
 lark/parsers/earley.py             |  4 ++--
 lark/parsers/lalr_analysis.py      |  6 +++---
 tests/__main__.py                  |  8 ++++----
 tests/test_logger.py               | 26 +++++++++++++-------------
 tests/test_nearley/test_nearley.py |  6 +++---
 tests/test_parser.py               |  4 ++--
 10 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/docs/how_to_use.md b/docs/how_to_use.md
index 78f4df2..303098f 100644
--- a/docs/how_to_use.md
+++ b/docs/how_to_use.md
@@ -30,13 +30,13 @@ Use the reference pages for more in-depth explanations. (links in the [main page
 
 ## LALR usage
 
-By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure the `LOGGER` beforehand. For example:
+By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure the `logger` beforehand. For example:
 
 ```python
 import logging
-from lark import Lark, LOGGER
+from lark import Lark, logger
 
-LOGGER.setLevel(logging.DEBUG)
+logger.setLevel(logging.DEBUG)
 
 collision_grammar = '''
 start: as as
diff --git a/lark/__init__.py b/lark/__init__.py
index e4c54dd..e3021cf 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -1,4 +1,4 @@
-from .common import LOGGER
+from .common import logger
 from .tree import Tree
 from .visitors import Transformer, Visitor, v_args, Discard
 from .visitors import InlineTransformer, inline_args   # XXX Deprecated
diff --git a/lark/common.py b/lark/common.py
index 3bd7c98..745e287 100644
--- a/lark/common.py
+++ b/lark/common.py
@@ -2,11 +2,11 @@ import logging
 from .utils import Serialize
 from .lexer import TerminalDef
 
-LOGGER = logging.getLogger("lark")
-LOGGER.addHandler(logging.StreamHandler())
+logger = logging.getLogger("lark")
+logger.addHandler(logging.StreamHandler())
 # Set to highest level, since we have some warnings amongst the code
 # By default, we should not output any log messages
-LOGGER.setLevel(logging.CRITICAL)
+logger.setLevel(logging.CRITICAL)
 
 ###{standalone
 
diff --git a/lark/lark.py b/lark/lark.py
index 8df2b87..9bb60c8 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -7,7 +7,7 @@ from io import open
 from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS
 from .load_grammar import load_grammar
 from .tree import Tree
-from .common import LexerConf, ParserConf, LOGGER
+from .common import LexerConf, ParserConf, logger
 
 from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken
 from .parse_tree_builder import ParseTreeBuilder
@@ -205,7 +205,7 @@ class Lark(Serialize):
                 cache_fn = '.lark_cache_%s.tmp' % md5
 
             if FS.exists(cache_fn):
-                LOGGER.debug('Loading grammar from cache: %s', cache_fn)
+                logger.debug('Loading grammar from cache: %s', cache_fn)
                 with FS.open(cache_fn, 'rb') as f:
                     self._load(f, self.options.transformer, self.options.postlex)
                 return
@@ -284,7 +284,7 @@ class Lark(Serialize):
             self.lexer = self._build_lexer()
 
         if cache_fn:
-            LOGGER.debug('Saving grammar to cache: %s', cache_fn)
+            logger.debug('Saving grammar to cache: %s', cache_fn)
             with FS.open(cache_fn, 'wb') as f:
                 self.save(f)
 
diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index 5fc7531..bf099e6 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -14,7 +14,7 @@ from collections import deque
 
 from ..visitors import Transformer_InPlace, v_args
 from ..exceptions import UnexpectedEOF, UnexpectedToken
-from ..common import LOGGER
+from ..common import logger
 from .grammar_analysis import GrammarAnalyzer
 from ..grammar import NonTerminal
 from .earley_common import Item, TransitiveItem
@@ -301,7 +301,7 @@ class Parser:
             try:
                 debug_walker = ForestToPyDotVisitor()
             except ImportError:
-                LOGGER.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
+                logger.warning("Cannot find dependency 'pydot', will not generate sppf debug image")
             else:
                 debug_walker.visit(solutions[0], "sppf.png")
 
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index 6fefa4c..861941f 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -10,7 +10,7 @@ from collections import defaultdict, deque
 
 from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
 from ..exceptions import GrammarError
-from ..common import LOGGER
+from ..common import logger
 
 from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
 from ..grammar import Rule
@@ -256,8 +256,8 @@ class LALR_Analyzer(GrammarAnalyzer):
                     raise GrammarError('Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ])))
                 if la in actions:
                     if self.debug:
-                        LOGGER.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
-                        LOGGER.warning(' * %s', list(rules)[0])
+                        logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
+                        logger.warning(' * %s', list(rules)[0])
                 else:
                     actions[la] = (Reduce, list(rules)[0])
             m[state] = { k.name: v for k, v in actions.items() }
diff --git a/tests/__main__.py b/tests/__main__.py
index 1807aa8..9ef9f1b 100644
--- a/tests/__main__.py
+++ b/tests/__main__.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function
 
 import unittest
 import logging
-from lark import LOGGER
+from lark import logger
 
 from .test_trees import TestTrees
 from .test_tools import TestStandalone
@@ -12,12 +12,12 @@ from .test_reconstructor import TestReconstructor
 try:
     from .test_nearley.test_nearley import TestNearley
 except ImportError:
-    LOGGER.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")
+    logger.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)")
 
 # from .test_selectors import TestSelectors
 # from .test_grammars import TestPythonG, TestConfigG
 
-from .test_logger import TestLogger
+from .test_logger import Testlogger
 
 from .test_parser import (
         TestLalrStandard,
@@ -34,7 +34,7 @@ from .test_parser import (
         TestParsers,
         )
 
-LOGGER.setLevel(logging.INFO)
+logger.setLevel(logging.INFO)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/test_logger.py b/tests/test_logger.py
index dd6beb3..93dc8ed 100644
--- a/tests/test_logger.py
+++ b/tests/test_logger.py
@@ -1,6 +1,6 @@
 import logging
 from contextlib import contextmanager
-from lark import Lark, LOGGER
+from lark import Lark, logger
 from unittest import TestCase, main
 
 try:
@@ -11,17 +11,17 @@ except ImportError:
 @contextmanager
 def capture_log():
     stream = StringIO()
-    orig_handler = LOGGER.handlers[0]
-    del LOGGER.handlers[:]
-    LOGGER.addHandler(logging.StreamHandler(stream))
+    orig_handler = logger.handlers[0]
+    del logger.handlers[:]
+    logger.addHandler(logging.StreamHandler(stream))
     yield stream
-    del LOGGER.handlers[:]
-    LOGGER.addHandler(orig_handler)
+    del logger.handlers[:]
+    logger.addHandler(orig_handler)
 
-class TestLogger(TestCase):
+class Testlogger(TestCase):
 
     def test_debug(self):
-        LOGGER.setLevel(logging.DEBUG)
+        logger.setLevel(logging.DEBUG)
         collision_grammar = '''
         start: as as
         as: a*
@@ -31,12 +31,12 @@ class TestLogger(TestCase):
             Lark(collision_grammar, parser='lalr', debug=True)
 
         log = log.getvalue()
-        self.assertIn("Shift/Reduce conflict for terminal", log)
-        self.assertIn("A: (resolving as shift)", log)
-        self.assertIn("Shift/Reduce conflict for terminal A: (resolving as shift)", log)
+        # since there are conflicts about A
+        # symbol A should appear in the log message for hint
+        self.assertIn("A", log)
 
     def test_non_debug(self):
-        LOGGER.setLevel(logging.DEBUG)
+        logger.setLevel(logging.DEBUG)
         collision_grammar = '''
         start: as as
         as: a*
@@ -49,7 +49,7 @@ class TestLogger(TestCase):
         self.assertEqual(len(log), 0)
 
     def test_loglevel_higher(self):
-        LOGGER.setLevel(logging.ERROR)
+        logger.setLevel(logging.ERROR)
         collision_grammar = '''
         start: as as
         as: a*
diff --git a/tests/test_nearley/test_nearley.py b/tests/test_nearley/test_nearley.py
index 345af8a..1ad6449 100644
--- a/tests/test_nearley/test_nearley.py
+++ b/tests/test_nearley/test_nearley.py
@@ -6,17 +6,17 @@ import logging
 import os
 import codecs
 
-from lark import LOGGER
+from lark import logger
 from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main
 
-LOGGER.setLevel(logging.INFO)
+logger.setLevel(logging.INFO)
 
 TEST_PATH    = os.path.abspath(os.path.dirname(__file__))
 NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley')
 BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin')
 
 if not os.path.exists(NEARLEY_PATH):
-    LOGGER.warn("Nearley not installed. Skipping Nearley tests!")
+    logger.warn("Nearley not installed. Skipping Nearley tests!")
     raise ImportError("Skipping Nearley tests!")
 
 import js2py    # Ensures that js2py exists, to avoid failing tests
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 5a10b9f..88d175f 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -24,7 +24,7 @@ try:
 except ImportError:
     regex = None
 
-from lark import LOGGER
+from lark import logger
 from lark.lark import Lark
 from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
 from lark.tree import Tree
@@ -32,7 +32,7 @@ from lark.visitors import Transformer, Transformer_InPlace, v_args
 from lark.grammar import Rule
 from lark.lexer import TerminalDef, Lexer, TraditionalLexer
 
-LOGGER.setLevel(logging.INFO)
+logger.setLevel(logging.INFO)
 
 
 __path__ = os.path.dirname(__file__)

From 438e89dea9cd886a4bc01738a224e6a0e5fbb519 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 8 Aug 2020 15:33:36 +0300
Subject: [PATCH 04/25] Fix readthedocs (Issue #640)

---
 mkdocs.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mkdocs.yml b/mkdocs.yml
index 6c22d89..8d2a562 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -13,3 +13,4 @@ pages:
     - Classes Reference: classes.md
     - Recipes: recipes.md
     - Import grammars from Nearley: nearley.md
+    - Tutorial - JSON Parser: json_tutorial.md

From 61a7c1e20a6c6cbdbd23fdd20611075fe3147176 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 8 Aug 2020 15:43:20 +0300
Subject: [PATCH 05/25] Removed code that causes failure in Python 3.4

---
 lark/exceptions.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/lark/exceptions.py b/lark/exceptions.py
index 033275c..645b09c 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -72,11 +72,7 @@ class UnexpectedInput(LarkError):
 
 class UnexpectedCharacters(LexError, UnexpectedInput):
     def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
-        
-        if isinstance(seq, bytes):
-            message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace"), line, column)
-        else:
-            message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
+        message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
 
         self.line = line
         self.column = column

From 5954fdf87aa79c7369c040ade8dbdd04dff58ef7 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 8 Aug 2020 16:16:34 +0300
Subject: [PATCH 06/25] Restore bad code (needs better fix). Updated readme &
 docs.

---
 README.md          | 7 +++----
 docs/features.md   | 2 +-
 lark/exceptions.py | 6 +++++-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 18c181f..23ec565 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ Most importantly, Lark will save you time and prevent you from getting parsing h
 
 ### Install Lark
 
-    $ pip install lark-parser
+    $ pip install lark-parser --upgrade
 
 Lark has no dependencies.
 
@@ -77,12 +77,11 @@ Notice punctuation doesn't appear in the resulting tree. It's automatically filt
 
 ### Fruit flies like bananas
 
-Lark is great at handling ambiguity. Let's parse the phrase "fruit flies like bananas":
+Lark is great at handling ambiguity. Here is the result of parsing the phrase "fruit flies like bananas":
 
 ![fruitflies.png](examples/fruitflies.png)
 
-See more [examples here](https://github.com/lark-parser/lark/tree/master/examples)
-
+See the code and more [examples here](https://github.com/lark-parser/lark/tree/master/examples)
 
 
 ## List of main features
diff --git a/docs/features.md b/docs/features.md
index 9346989..c2f6983 100644
--- a/docs/features.md
+++ b/docs/features.md
@@ -19,8 +19,8 @@
 [Read more about the parsers](parsers.md)
 
 # Extra features
-
   - Import rules and tokens from other Lark grammars, for code reuse and modularity.
+  - Support for external regex module ([see here](/docs/classes.md#using-unicode-character-classes-with-regex))
   - Import grammars from Nearley.js ([read more](/docs/nearley.md))
   - CYK parser
 
diff --git a/lark/exceptions.py b/lark/exceptions.py
index 645b09c..a844dd4 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -72,7 +72,11 @@ class UnexpectedInput(LarkError):
 
 class UnexpectedCharacters(LexError, UnexpectedInput):
     def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
-        message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
+
+        if isinstance(seq, bytes):
+            message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace"), line, column)
+        else:
+            message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
 
         self.line = line
         self.column = column

From 8dc8865072a526dbb70cd6f073668fe22c5680b8 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sat, 8 Aug 2020 16:21:01 +0300
Subject: [PATCH 07/25] [docs] Fixed links

---
 docs/features.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/features.md b/docs/features.md
index c2f6983..00fdf4b 100644
--- a/docs/features.md
+++ b/docs/features.md
@@ -20,8 +20,8 @@
 
 # Extra features
   - Import rules and tokens from other Lark grammars, for code reuse and modularity.
-  - Support for external regex module ([see here](/docs/classes.md#using-unicode-character-classes-with-regex))
-  - Import grammars from Nearley.js ([read more](/docs/nearley.md))
+  - Support for external regex module ([see here](classes.md#using-unicode-character-classes-with-regex))
+  - Import grammars from Nearley.js ([read more](nearley.md))
   - CYK parser
 
 ### Experimental features

From b7068c45a73bc70d3f9611c81198f0aa5571c4d9 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Sun, 9 Aug 2020 12:05:07 +0300
Subject: [PATCH 08/25] Tiny fixes. Don't test use_bytes on Python 3.4.

---
 docs/index.md        | 2 +-
 lark/visitors.py     | 2 ++
 tests/test_parser.py | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index 1310be2..c72305d 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -32,7 +32,7 @@ $ pip install lark-parser
 
 
 * [Philosophy & Design Choices](philosophy.md)
-* [Full List of Features](features.md)
+* [Features](features.md)
 * [Examples](https://github.com/lark-parser/lark/tree/master/examples)
 * [Online IDE](https://lark-parser.github.io/lark/ide/app.html)
 * Tutorials
diff --git a/lark/visitors.py b/lark/visitors.py
index 3f80016..6494deb 100644
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -14,6 +14,8 @@ class Discard(Exception):
 # Transformers
 
 class _Decoratable:
+    "Provides support for decorating methods with @v_args"
+
     @classmethod
     def _apply_decorator(cls, decorator, **kwargs):
         mro = getmro(cls)
diff --git a/tests/test_parser.py b/tests/test_parser.py
index f1e269f..cd3ea4d 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -721,7 +721,8 @@ def _make_parser_test(LEXER, PARSER):
                           """)
             g.parse('\x01\x02\x03')
 
-        @unittest.skipIf(sys.version_info[:2]==(2, 7), "bytes parser isn't perfect in Python2.7, exceptions don't work correctly")
+        @unittest.skipIf(sys.version_info[0]==2 or sys.version_info[:2]==(3, 4),
+                         "bytes parser isn't perfect in Python2, exceptions don't work correctly")
         def test_bytes_utf8(self):
             g = r"""
             start: BOM? char+

From 9923987e94547ded8a17d7a03840c4cebce39188 Mon Sep 17 00:00:00 2001
From: decorator-factory <42166884+decorator-factory@users.noreply.github.com>
Date: Mon, 10 Aug 2020 23:07:55 +0300
Subject: [PATCH 09/25] allow multiline regexes with 'x' (verbose) flag

---
 lark/load_grammar.py | 13 ++++++++++---
 tests/test_parser.py | 26 ++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index ae7ec32..d716ec1 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -13,7 +13,7 @@ from .parser_frontends import LALR_TraditionalLexer
 from .common import LexerConf, ParserConf
 from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
 from .utils import classify, suppress, dedup_list, Str
-from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken
+from .exceptions import GrammarError, LarkError, UnexpectedCharacters, UnexpectedToken
 
 from .tree import Tree, SlottedTree as ST
 from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive
@@ -85,7 +85,7 @@ TERMINALS = {
     'RULE': '!?[_?]?[a-z][_a-z0-9]*',
     'TERMINAL': '_?[A-Z][_A-Z0-9]*',
     'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
-    'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]*' % _RE_FLAGS,
+    'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS,
     '_NL': r'(\r?\n)+\s*',
     'WS': r'[ \t]+',
     'COMMENT': r'\s*//[^\n]*',
@@ -336,7 +336,7 @@ class PrepareAnonTerminals(Transformer_InPlace):
                     term_name = None
 
         elif isinstance(p, PatternRE):
-            if p in self.term_reverse: # Kind of a wierd placement.name
+            if p in self.term_reverse: # Kind of a weird placement.name
                 term_name = self.term_reverse[p].name
         else:
             assert False, p
@@ -409,6 +409,13 @@ def _literal_to_pattern(literal):
     flags = v[flag_start:]
     assert all(f in _RE_FLAGS for f in flags), flags
 
+    if literal.type == 'STRING' and '\n' in v:
+        raise GrammarError('You cannot put newlines in string literals')
+
+    if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags:
+        raise GrammarError('You can only use newlines in regular expressions '
+                           'with the `x` (verbose) flag')
+
     v = v[:flag_start]
     assert v[0] == v[-1] and v[0] in '"/'
     x = v[1:-1]
diff --git a/tests/test_parser.py b/tests/test_parser.py
index cd3ea4d..48a4674 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1262,6 +1262,32 @@ def _make_parser_test(LEXER, PARSER):
             tree = l.parse('aA')
             self.assertEqual(tree.children, ['a', 'A'])
 
+        def test_token_flags_verbose(self):
+            g = _Lark(r"""start: NL | ABC
+                          ABC: / [a-z] /x
+                          NL: /\n/
+                      """)
+            x = g.parse('a')
+            self.assertEqual(x.children, ['a'])
+
+        def test_token_flags_verbose_multiline(self):
+            g = _Lark(r"""start: ABC
+                          ABC: /  a      b c
+                               d
+                                e f
+                           /x
+                       """)
+            x = g.parse('abcdef')
+            self.assertEqual(x.children, ['abcdef'])
+
+        def test_token_multiline_only_works_with_x_flag(self):
+            g = r"""start: ABC
+                    ABC: /  a      b c
+                              d
+                                e f
+                            /i
+                      """
+            self.assertRaises( GrammarError, _Lark, g)
 
         @unittest.skipIf(PARSER == 'cyk', "No empty rules")
         def test_twice_empty(self):

From 8b59a1642533f1f577b104c7be33f0511193050d Mon Sep 17 00:00:00 2001
From: decorator-factory <42166884+decorator-factory@users.noreply.github.com>
Date: Tue, 11 Aug 2020 00:44:23 +0300
Subject: [PATCH 10/25] refactor: replace dict lookup with simple conditional

---
 lark/load_grammar.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index d716ec1..1a1a396 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -424,9 +424,11 @@ def _literal_to_pattern(literal):
 
     if literal.type == 'STRING':
         s = s.replace('\\\\', '\\')
-
-    return { 'STRING': PatternStr,
-             'REGEXP': PatternRE }[literal.type](s, flags)
+        return PatternStr(s, flags)
+    elif literal.type == 'REGEXP':
+        return PatternRE(s, flags)
+    else:
+        assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]'
 
 
 @inline_args

From 2525e0ce9c594b81a79caa5ff57c66a12a79ca5a Mon Sep 17 00:00:00 2001
From: decorator-factory <42166884+decorator-factory@users.noreply.github.com>
Date: Tue, 11 Aug 2020 00:46:54 +0300
Subject: [PATCH 11/25] formatting: fix pistol operator

---
 lark/load_grammar.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lark/load_grammar.py b/lark/load_grammar.py
index 1a1a396..0ee546c 100644
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -13,7 +13,7 @@ from .parser_frontends import LALR_TraditionalLexer
 from .common import LexerConf, ParserConf
 from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
 from .utils import classify, suppress, dedup_list, Str
-from .exceptions import GrammarError, LarkError, UnexpectedCharacters, UnexpectedToken
+from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken
 
 from .tree import Tree, SlottedTree as ST
 from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive
@@ -850,7 +850,7 @@ class GrammarLoader:
                 if len(stmt.children) > 1:
                     path_node, arg1 = stmt.children
                 else:
-                    path_node, = stmt.children
+                    path_node ,= stmt.children
                     arg1 = None
 
                 if isinstance(arg1, Tree):  # Multi import

From 28e0a86f389c329a35091b7acb7b0afc5d57dc74 Mon Sep 17 00:00:00 2001
From: MegaIng1 <trampchamp@hotmail.de>
Date: Wed, 12 Aug 2020 14:48:55 +0200
Subject: [PATCH 12/25] Small improvements for debug info

---
 lark-stubs/exceptions.pyi   | 15 ++++++++++-----
 lark/exceptions.py          | 15 ++++++++++++---
 lark/parsers/lalr_puppet.py |  6 +++---
 3 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/lark-stubs/exceptions.pyi b/lark-stubs/exceptions.pyi
index f09bfbd..012ac51 100644
--- a/lark-stubs/exceptions.pyi
+++ b/lark-stubs/exceptions.pyi
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-from typing import Dict, Iterable, Callable, Union
+from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple
 from .tree import Tree
 from .lexer import Token
 
@@ -21,6 +21,9 @@ class LexError(LarkError):
     pass
 
 
+T = TypeVar('T')
+
+
 class UnexpectedInput(LarkError):
     pos_in_stream: int
 
@@ -28,10 +31,12 @@ class UnexpectedInput(LarkError):
         ...
 
     def match_examples(
-        self,
-        parse_fn: Callable[[str], Tree],
-        examples: Dict[str, Iterable[str]]
-    ):
+            self,
+            parse_fn: Callable[[str], Tree],
+            examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
+            token_type_match_fallback: bool = False,
+            print_debug_info: bool = True
+    ) -> T:
         ...
 
 
diff --git a/lark/exceptions.py b/lark/exceptions.py
index 033275c..47670a6 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -37,34 +37,43 @@ class UnexpectedInput(LarkError):
             after = text[pos:end].split(b'\n', 1)[0]
             return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace")
 
-    def match_examples(self, parse_fn, examples, token_type_match_fallback=False):
+    def match_examples(self, parse_fn, examples, token_type_match_fallback=False, print_debug_info=True):
         """ Given a parser instance and a dictionary mapping some label with
             some malformed syntax examples, it'll return the label for the
             example that bests matches the current error.
         """
         assert self.state is not None, "Not supported for this exception"
+        
+        if isinstance(examples, dict):
+            examples = examples.items()
 
         candidate = (None, False)
-        for label, example in examples.items():
+        for i, (label, example) in enumerate(examples):
             assert not isinstance(example, STRING_TYPE)
 
-            for malformed in example:
+            for j, malformed in enumerate(example):
                 try:
                     parse_fn(malformed)
                 except UnexpectedInput as ut:
                     if ut.state == self.state:
                         try:
                             if ut.token == self.token:  # Try exact match first
+                                if print_debug_info:
+                                    print("Exact Match at %d, with example %d" % (i, j), (ut.token, self.token, ut.state, self.state))
                                 return label
 
                             if token_type_match_fallback:
                                 # Fallback to token types match
                                 if (ut.token.type == self.token.type) and not candidate[-1]:
+                                    if print_debug_info:
+                                        print("Token Type Fallback at %d, with example %d" % (i, j))
                                     candidate = label, True
 
                         except AttributeError:
                             pass
                         if not candidate[0]:
+                            if print_debug_info:
+                                print("Defaulted at %d, with example %d" % (i, j))
                             candidate = label, False
 
         return candidate[0]
diff --git a/lark/parsers/lalr_puppet.py b/lark/parsers/lalr_puppet.py
index 968783c..d5a4703 100644
--- a/lark/parsers/lalr_puppet.py
+++ b/lark/parsers/lalr_puppet.py
@@ -16,7 +16,7 @@ class ParserPuppet:
         self.result = None
 
     def feed_token(self, token):
-        """Advance the parser state, as if it just recieved `token` from the lexer
+        """Advance the parser state, as if it just received `token` from the lexer
 
         """
         end_state = self.parser.parse_table.end_states[self._start]
@@ -66,9 +66,9 @@ class ParserPuppet:
             self._set_state,
         )
 
-    def pretty():
+    def pretty(self):
         print("Puppet choices:")
-        for k, v in self.choices.items():
+        for k, v in self.choices().items():
             print('\t-', k, '->', v)
         print('stack size:', len(self._state_stack))
 

From a7bcd0bc2d3cb96030d9e77523c0007e8034ce49 Mon Sep 17 00:00:00 2001
From: MegaIng1 <trampchamp@hotmail.de>
Date: Wed, 12 Aug 2020 15:36:01 +0200
Subject: [PATCH 13/25] Added `accepts` attribute to `UnexpectedToken` and
 update stubs

---
 lark-stubs/exceptions.pyi   | 15 ++++++++++-----
 lark/exceptions.py          |  5 +++--
 lark/parsers/lalr_parser.py | 13 +++++++++++--
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/lark-stubs/exceptions.pyi b/lark-stubs/exceptions.pyi
index 012ac51..67c39fb 100644
--- a/lark-stubs/exceptions.pyi
+++ b/lark-stubs/exceptions.pyi
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple
+from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set
 from .tree import Tree
 from .lexer import Token
 
@@ -25,7 +25,10 @@ T = TypeVar('T')
 
 
 class UnexpectedInput(LarkError):
+    line: int
+    column: int
     pos_in_stream: int
+    state: Any
 
     def get_context(self, text: str, span: int = ...):
         ...
@@ -41,12 +44,14 @@ class UnexpectedInput(LarkError):
 
 
 class UnexpectedToken(ParseError, UnexpectedInput):
-    pass
-
+    expected: List[str]
+    considered_rules: Set[str]
+    puppet: Any
+    accepts: List[str]
 
 class UnexpectedCharacters(LexError, UnexpectedInput):
-    line: int
-    column: int
+    allowed: Set[str]
+    considered_tokens: Set[Any]
 
 
 class VisitError(LarkError):
diff --git a/lark/exceptions.py b/lark/exceptions.py
index 47670a6..022a00f 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -105,7 +105,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput):
 
 
 class UnexpectedToken(ParseError, UnexpectedInput):
-    def __init__(self, token, expected, considered_rules=None, state=None, puppet=None):
+    def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, accepts=None):
         self.token = token
         self.expected = expected     # XXX str shouldn't necessary
         self.line = getattr(token, 'line', '?')
@@ -114,10 +114,11 @@ class UnexpectedToken(ParseError, UnexpectedInput):
         self.state = state
         self.pos_in_stream = getattr(token, 'pos_in_stream', None)
         self.puppet = puppet
+        self.accepts = accepts
 
         message = ("Unexpected token %r at line %s, column %s.\n"
                    "Expected one of: \n\t* %s\n"
-                   % (token, self.line, self.column, '\n\t* '.join(self.expected)))
+                   % (token, self.line, self.column, '\n\t* '.join(self.accepts or self.expected)))
 
         super(UnexpectedToken, self).__init__(message)
 
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index f26cbc5..f61e093 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -62,9 +62,18 @@ class _Parser:
                 expected = [s for s in states[state].keys() if s.isupper()]
                 try:
                     puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state)
+                    accepts = []
+                    for t in expected:
+                        new_puppet = puppet.copy()
+                        try:
+                            new_puppet.feed_token(Token(t, ''))
+                        except KeyError:
+                            pass
+                        else:
+                            accepts.append(t)
                 except NameError:
-                    puppet = None
-                raise UnexpectedToken(token, expected, state=state, puppet=puppet)
+                    puppet = accepts = None
+                raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts)
 
         def reduce(rule):
             size = len(rule.expansion)

From d3b0449f714615b190699644650e41669a1510d4 Mon Sep 17 00:00:00 2001
From: MegaIng1 <trampchamp@hotmail.de>
Date: Wed, 12 Aug 2020 16:46:36 +0200
Subject: [PATCH 14/25] Improved `match_examples` with
 `UnexpectedToken.accepts`

---
 lark/exceptions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/exceptions.py b/lark/exceptions.py
index 022a00f..497cf96 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -55,7 +55,7 @@ class UnexpectedInput(LarkError):
                 try:
                     parse_fn(malformed)
                 except UnexpectedInput as ut:
-                    if ut.state == self.state:
+                    if ut.state == self.state and ut.accepts == self.accepts:
                         try:
                             if ut.token == self.token:  # Try exact match first
                                 if print_debug_info:

From 2e160c046e5de3d82b664d9867c1e9386ff4efb7 Mon Sep 17 00:00:00 2001
From: MegaIng1 <trampchamp@hotmail.de>
Date: Wed, 12 Aug 2020 16:52:21 +0200
Subject: [PATCH 15/25] Correction for python2.7 (LalrPuppet-> new style class)

---
 lark/parsers/lalr_puppet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lark/parsers/lalr_puppet.py b/lark/parsers/lalr_puppet.py
index d5a4703..2b350bf 100644
--- a/lark/parsers/lalr_puppet.py
+++ b/lark/parsers/lalr_puppet.py
@@ -4,7 +4,7 @@ from copy import deepcopy
 
 from .lalr_analysis import Shift, Reduce
 
-class ParserPuppet:
+class ParserPuppet(object):
     def __init__(self, parser, state_stack, value_stack, start, stream, set_state):
         self.parser = parser
         self._state_stack = state_stack

From cb2d9cded072e0f150b0d6d349fd431369b83a93 Mon Sep 17 00:00:00 2001
From: MegaIng1 <trampchamp@hotmail.de>
Date: Thu, 13 Aug 2020 03:51:01 +0200
Subject: [PATCH 16/25] Refactored ParserPuppet, added stubs

---
 lark-stubs/exceptions.pyi          | 10 +++++-----
 lark-stubs/parsers/__init__.pyi    |  0
 lark-stubs/parsers/lalr_puppet.pyi | 21 +++++++++++++++++++++
 lark/exceptions.py                 | 19 ++++++++++---------
 lark/parsers/lalr_parser.py        | 12 ++----------
 lark/parsers/lalr_puppet.py        | 21 ++++++++++++++++++---
 6 files changed, 56 insertions(+), 27 deletions(-)
 create mode 100644 lark-stubs/parsers/__init__.pyi
 create mode 100644 lark-stubs/parsers/lalr_puppet.pyi

diff --git a/lark-stubs/exceptions.pyi b/lark-stubs/exceptions.pyi
index 67c39fb..268844c 100644
--- a/lark-stubs/exceptions.pyi
+++ b/lark-stubs/exceptions.pyi
@@ -3,7 +3,7 @@
 from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set
 from .tree import Tree
 from .lexer import Token
-
+from .parsers.lalr_puppet import ParserPuppet
 
 class LarkError(Exception):
     pass
@@ -38,16 +38,16 @@ class UnexpectedInput(LarkError):
             parse_fn: Callable[[str], Tree],
             examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
             token_type_match_fallback: bool = False,
-            print_debug_info: bool = True
+            use_accepts: bool = False,
     ) -> T:
         ...
 
 
 class UnexpectedToken(ParseError, UnexpectedInput):
-    expected: List[str]
+    expected: Set[str]
     considered_rules: Set[str]
-    puppet: Any
-    accepts: List[str]
+    puppet: ParserPuppet
+    accepts: Set[str]
 
 class UnexpectedCharacters(LexError, UnexpectedInput):
     allowed: Set[str]
diff --git a/lark-stubs/parsers/__init__.pyi b/lark-stubs/parsers/__init__.pyi
new file mode 100644
index 0000000..e69de29
diff --git a/lark-stubs/parsers/lalr_puppet.pyi b/lark-stubs/parsers/lalr_puppet.pyi
new file mode 100644
index 0000000..c138c32
--- /dev/null
+++ b/lark-stubs/parsers/lalr_puppet.pyi
@@ -0,0 +1,21 @@
+from typing import Set, Dict, Any
+
+from lark import Token, Tree
+
+
+class ParserPuppet(object):
+    """
+    Represents a LalrParser that can be step through.
+    Shouldn't instantiated by hand, but is accessible as `UnexpectedToken.puppet`
+    """
+    def feed_token(self, token: Token): ...
+
+    def copy(self) -> ParserPuppet: ...
+
+    def pretty(self) -> str: ...
+
+    def choices(self) -> Dict[str, Any]: ...
+
+    def accepts(self) -> Set[str]: ...
+
+    def resume_parse(self) -> Tree: ...
diff --git a/lark/exceptions.py b/lark/exceptions.py
index 92ef64e..03f3da4 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -1,3 +1,5 @@
+import logging
+
 from .utils import STRING_TYPE
 
 ###{standalone
@@ -37,7 +39,7 @@ class UnexpectedInput(LarkError):
             after = text[pos:end].split(b'\n', 1)[0]
             return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace")
 
-    def match_examples(self, parse_fn, examples, token_type_match_fallback=False, print_debug_info=True):
+    def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False):
         """ Given a parser instance and a dictionary mapping some label with
             some malformed syntax examples, it'll return the label for the
             example that bests matches the current error.
@@ -55,27 +57,26 @@ class UnexpectedInput(LarkError):
                 try:
                     parse_fn(malformed)
                 except UnexpectedInput as ut:
-                    if ut.state == self.state and ut.accepts == self.accepts:
+                    if ut.state == self.state and (not use_accepts or ut.accepts == self.accepts):
                         try:
                             if ut.token == self.token:  # Try exact match first
-                                if print_debug_info:
-                                    print("Exact Match at %d, with example %d" % (i, j), (ut.token, self.token, ut.state, self.state))
+                                logging.debug("Exact Match at example [%s][%s]" % (i, j))
                                 return label
 
                             if token_type_match_fallback:
                                 # Fallback to token types match
                                 if (ut.token.type == self.token.type) and not candidate[-1]:
-                                    if print_debug_info:
-                                        print("Token Type Fallback at %d, with example %d" % (i, j))
+                                    logging.debug("Token Type Fallback at example [%s][%s]" % (i, j))
                                     candidate = label, True
 
                         except AttributeError:
                             pass
                         if not candidate[0]:
-                            if print_debug_info:
-                                print("Defaulted at %d, with example %d" % (i, j))
+                            logging.debug("Same State match at example [%s][%s]" % (i, j))
                             candidate = label, False
-
+                    elif ut.state == self.state:
+                        logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
+                                      (self.state, self.accepts, ut.accepts, i, j))
         return candidate[0]
 
 
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index f61e093..ba75606 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -59,18 +59,10 @@ class _Parser:
             try:
                 return states[state][token.type]
             except KeyError:
-                expected = [s for s in states[state].keys() if s.isupper()]
+                expected = {s for s in states[state].keys() if s.isupper()}
                 try:
                     puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state)
-                    accepts = []
-                    for t in expected:
-                        new_puppet = puppet.copy()
-                        try:
-                            new_puppet.feed_token(Token(t, ''))
-                        except KeyError:
-                            pass
-                        else:
-                            accepts.append(t)
+                    accepts = puppet.accepts()
                 except NameError:
                     puppet = accepts = None
                 raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts)
diff --git a/lark/parsers/lalr_puppet.py b/lark/parsers/lalr_puppet.py
index 2b350bf..24c77a1 100644
--- a/lark/parsers/lalr_puppet.py
+++ b/lark/parsers/lalr_puppet.py
@@ -3,6 +3,8 @@
 from copy import deepcopy
 
 from .lalr_analysis import Shift, Reduce
+from .. import Token
+
 
 class ParserPuppet(object):
     def __init__(self, parser, state_stack, value_stack, start, stream, set_state):
@@ -67,13 +69,26 @@ class ParserPuppet(object):
         )
 
     def pretty(self):
-        print("Puppet choices:")
+        out = ["Puppet choices:"]
         for k, v in self.choices().items():
-            print('\t-', k, '->', v)
-        print('stack size:', len(self._state_stack))
+            out.append('\t- %s -> %s' % (k, v))
+        out.append('stack size: %s' % len(self._state_stack))
+        return '\n'.join(out)
 
     def choices(self):
         return self.parser.parse_table.states[self._state_stack[-1]]
 
+    def accepts(self):
+        accepts = set()
+        for t in self.choices():
+            new_puppet = self.copy()
+            try:
+                new_puppet.feed_token(Token(t, ''))
+            except KeyError:
+                pass
+            else:
+                accepts.add(t)
+        return accepts
+
     def resume_parse(self):
         return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack)

From d4503374ff6171425c70a57899443cef10210553 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 13 Aug 2020 10:09:31 +0300
Subject: [PATCH 17/25] Small addition to docs

---
 README.md       | 1 +
 docs/grammar.md | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 23ec565..69ccb2b 100644
--- a/README.md
+++ b/README.md
@@ -155,6 +155,7 @@ Check out the [JSON tutorial](/docs/json_tutorial.md#conclusion) for more detail
  - [miniwdl](https://github.com/chanzuckerberg/miniwdl) - A static analysis toolkit for the Workflow Description Language
  - [pytreeview](https://gitlab.com/parmenti/pytreeview) - a lightweight tree-based grammar explorer
  - [harmalysis](https://github.com/napulen/harmalysis) - A language for harmonic analysis and music theory
+ - [gersemi](https://github.com/BlankSpruce/gersemi) - A CMake code formatter
 
 Using Lark? Send me a message and I'll add your project!
 
diff --git a/docs/grammar.md b/docs/grammar.md
index d4ecec5..ff6553f 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -112,6 +112,8 @@ Terminals can be assigned priority only when using a lexer (future versions may
 
 Priority can be either positive or negative. If not specified for a terminal, it defaults to 1.
 
+Highest priority terminals are always matched first.
+
 ### Regexp Flags
 
 You can use flags on regexps and strings. For example:

From 02d57bc32a2fae1722ee3f8e003a3d6234e58190 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 13 Aug 2020 11:43:52 +0300
Subject: [PATCH 18/25] Small adjustments to PR

---
 lark-stubs/parsers/lalr_puppet.pyi |  5 ++--
 lark/exceptions.py                 | 42 +++++++++++++++++-------------
 lark/parsers/lalr_parser.py        |  7 +++--
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/lark-stubs/parsers/lalr_puppet.pyi b/lark-stubs/parsers/lalr_puppet.pyi
index c138c32..f35112a 100644
--- a/lark-stubs/parsers/lalr_puppet.pyi
+++ b/lark-stubs/parsers/lalr_puppet.pyi
@@ -5,8 +5,9 @@ from lark import Token, Tree
 
 class ParserPuppet(object):
     """
-    Represents a LalrParser that can be step through.
-    Shouldn't instantiated by hand, but is accessible as `UnexpectedToken.puppet`
+    Provides an interface to interactively step through the parser (LALR(1) only for now)
+
+    Accessible via `UnexpectedToken.puppet` (raised by the parser on token error)
     """
     def feed_token(self, token: Token): ...
 
diff --git a/lark/exceptions.py b/lark/exceptions.py
index 03f3da4..e1225a9 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -45,7 +45,7 @@ class UnexpectedInput(LarkError):
             example that bests matches the current error.
         """
         assert self.state is not None, "Not supported for this exception"
-        
+
         if isinstance(examples, dict):
             examples = examples.items()
 
@@ -57,7 +57,11 @@ class UnexpectedInput(LarkError):
                 try:
                     parse_fn(malformed)
                 except UnexpectedInput as ut:
-                    if ut.state == self.state and (not use_accepts or ut.accepts == self.accepts):
+                    if ut.state == self.state:
+                        if use_accepts and ut.accepts != self.accepts:
+                            logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
+                                        (self.state, self.accepts, ut.accepts, i, j))
+                            continue
                         try:
                             if ut.token == self.token:  # Try exact match first
                                 logging.debug("Exact Match at example [%s][%s]" % (i, j))
@@ -74,27 +78,25 @@ class UnexpectedInput(LarkError):
                         if not candidate[0]:
                             logging.debug("Same State match at example [%s][%s]" % (i, j))
                             candidate = label, False
-                    elif ut.state == self.state:
-                        logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
-                                      (self.state, self.accepts, ut.accepts, i, j))
+
         return candidate[0]
 
 
 class UnexpectedCharacters(LexError, UnexpectedInput):
     def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
+        self.line = line
+        self.column = column
+        self.pos_in_stream = lex_pos
+        self.state = state
+
+        self.allowed = allowed
+        self.considered_tokens = considered_tokens
 
         if isinstance(seq, bytes):
             message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace"), line, column)
         else:
             message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
 
-        self.line = line
-        self.column = column
-        self.allowed = allowed
-        self.considered_tokens = considered_tokens
-        self.pos_in_stream = lex_pos
-        self.state = state
-
         message += '\n\n' + self.get_context(seq)
         if allowed:
             message += '\nExpecting: %s\n' % allowed
@@ -106,16 +108,20 @@ class UnexpectedCharacters(LexError, UnexpectedInput):
 
 
 class UnexpectedToken(ParseError, UnexpectedInput):
-    def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, accepts=None):
-        self.token = token
-        self.expected = expected     # XXX str shouldn't necessary
+    def __init__(self, token, expected, considered_rules=None, state=None, puppet=None):
         self.line = getattr(token, 'line', '?')
         self.column = getattr(token, 'column', '?')
-        self.considered_rules = considered_rules
-        self.state = state
         self.pos_in_stream = getattr(token, 'pos_in_stream', None)
+        self.state = state
+
+        self.token = token
+        self.expected = expected     # XXX deprecate? `accepts` is better
+        self.considered_rules = considered_rules
         self.puppet = puppet
-        self.accepts = accepts
+
+        # TODO Only calculate `accepts()` when we need to display it to the user
+        # This will improve performance when doing automatic error handling
+        self.accepts = puppet and puppet.accepts()
 
         message = ("Unexpected token %r at line %s, column %s.\n"
                    "Expected one of: \n\t* %s\n"
diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py
index ba75606..cf6a4bf 100644
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -62,10 +62,9 @@ class _Parser:
                 expected = {s for s in states[state].keys() if s.isupper()}
                 try:
                     puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state)
-                    accepts = puppet.accepts()
-                except NameError:
-                    puppet = accepts = None
-                raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts)
+                except NameError:   # For standalone parser
+                    puppet = None
+                raise UnexpectedToken(token, expected, state=state, puppet=puppet)
 
         def reduce(rule):
             size = len(rule.expansion)

From 00e736fda3cebfc9766f293fcbf4826e7e7c8103 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 13 Aug 2020 11:48:05 +0300
Subject: [PATCH 19/25] Use accepts in default example (even though it's not
 necessary)

---
 examples/error_reporting_lalr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/error_reporting_lalr.py b/examples/error_reporting_lalr.py
index 5e7d967..f038eda 100644
--- a/examples/error_reporting_lalr.py
+++ b/examples/error_reporting_lalr.py
@@ -52,7 +52,7 @@ def parse(json_text):
                                 '[1,2,]',
                                 '{"foo":1,}',
                                 '{"foo":false,"bar":true,}']
-        })
+        }, use_accepts=True)
         if not exc_class:
             raise
         raise exc_class(u.get_context(json_text), u.line, u.column)

From 2c7afed894b362dc9b1ea13b658a6094f3c1e281 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 13 Aug 2020 11:55:44 +0300
Subject: [PATCH 20/25] Small fixes

---
 lark/exceptions.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lark/exceptions.py b/lark/exceptions.py
index e1225a9..7330125 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -43,6 +43,8 @@ class UnexpectedInput(LarkError):
         """ Given a parser instance and a dictionary mapping some label with
             some malformed syntax examples, it'll return the label for the
             example that bests matches the current error.
+
+            It's recommended to call this with `use_accepts=True`. The default is False for backwards compatibility.
         """
         assert self.state is not None, "Not supported for this exception"
 
@@ -93,10 +95,11 @@ class UnexpectedCharacters(LexError, UnexpectedInput):
         self.considered_tokens = considered_tokens
 
         if isinstance(seq, bytes):
-            message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace"), line, column)
+            _s = seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace")
         else:
-            message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
+            _s = seq[lex_pos]
 
+        message = "No terminal defined for '%s' at line %d col %d" % (_s, line, column)
         message += '\n\n' + self.get_context(seq)
         if allowed:
             message += '\nExpecting: %s\n' % allowed

From 96873d64ba8ef85fcad1daa2dd2e9bf931eb06ba Mon Sep 17 00:00:00 2001
From: Blank Spruce <32396809+BlankSpruce@users.noreply.github.com>
Date: Thu, 13 Aug 2020 18:09:05 +0200
Subject: [PATCH 21/25] Make transformer work with tokens in standalone parser,
 fixes #648

---
 lark/common.py           |  3 ---
 lark/lark.py             |  9 ++++++++-
 lark/parser_frontends.py | 16 +++++++++++++---
 tests/test_tools.py      | 27 +++++++++++++++++++++++++++
 4 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/lark/common.py b/lark/common.py
index cc8c73c..714399a 100644
--- a/lark/common.py
+++ b/lark/common.py
@@ -17,9 +17,6 @@ class LexerConf(Serialize):
         self.skip_validation = skip_validation
         self.use_bytes = use_bytes
 
-    def _deserialize(self):
-        self.callbacks = {} # TODO
-
 ###}
 
 class ParserConf:
diff --git a/lark/lark.py b/lark/lark.py
index daab45b..3ed96d7 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -344,7 +344,14 @@ class Lark(Serialize):
         self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
         self.source = '<deserialized>'
         self._prepare_callbacks()
-        self.parser = self.parser_class.deserialize(data['parser'], memo, self._callbacks, self.options.postlex, re_module)
+        self.parser = self.parser_class.deserialize(
+            data['parser'],
+            memo,
+            self._callbacks,
+            self.options.postlex,
+            self.options.transformer,
+            re_module
+        )
         return self
 
     @classmethod
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index 33ad9bc..a45bf9c 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -1,6 +1,6 @@
 from .utils import get_regexp_width, Serialize
 from .parsers.grammar_analysis import GrammarAnalyzer
-from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token
+from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token, TerminalDef
 from .parsers import earley, xearley, cyk
 from .parsers.lalr_parser import LALR_Parser
 from .grammar import Rule
@@ -58,6 +58,16 @@ class _ParserFrontend(Serialize):
         return self.parser.parse(input, start, *args)
 
 
+def _recreate_lexer_callbacks(memo, transformer):
+    result = {}
+    terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]
+    for terminal in terminals:
+        callback = getattr(transformer, terminal.name, None)
+        if callback is not None:
+            result[terminal.name] = callback
+    return result
+
+
 class WithLexer(_ParserFrontend):
     lexer = None
     parser = None
@@ -73,10 +83,11 @@ class WithLexer(_ParserFrontend):
         self.postlex = lexer_conf.postlex
 
     @classmethod
-    def deserialize(cls, data, memo, callbacks, postlex, re_module):
+    def deserialize(cls, data, memo, callbacks, postlex, transformer, re_module):
         inst = super(WithLexer, cls).deserialize(data, memo)
         inst.postlex = postlex
         inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
+        inst.lexer_conf.callbacks = _recreate_lexer_callbacks(memo, transformer)
         inst.lexer_conf.re_module = re_module
         inst.lexer_conf.skip_validation=True
         inst.init_lexer()
@@ -229,4 +240,3 @@ class CYK(WithLexer):
 
     def _apply_callback(self, tree):
         return self.callbacks[tree.rule](tree.children)
-
diff --git a/tests/test_tools.py b/tests/test_tools.py
index 1e0d78e..e691237 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -106,6 +106,33 @@ class TestStandalone(TestCase):
         x = l.parse('(\n)\n')
         self.assertEqual(x, Tree('start', []))
 
+    def test_transformer(self):
+        grammar = r"""
+            start: some_rule "(" SOME_TERMINAL ")"
+            some_rule: SOME_TERMINAL
+            SOME_TERMINAL: /[A-Za-z_][A-Za-z0-9_]*/
+        """
+        context = self._create_standalone(grammar)
+        _Lark = context["Lark_StandAlone"]
+
+        _Token = context["Token"]
+        _Tree = context["Tree"]
+
+        class MyTransformer(context["Transformer"]):
+            def SOME_TERMINAL(self, token):
+                return _Token("SOME_TERMINAL", "token is transformed")
+
+            def some_rule(self, children):
+                return _Tree("rule_is_transformed", [])
+
+        parser = _Lark(transformer=MyTransformer())
+        self.assertEqual(
+            parser.parse("FOO(BAR)"),
+            _Tree("start", [
+                _Tree("rule_is_transformed", []),
+                _Token("SOME_TERMINAL", "token is transformed")
+            ])
+        )
 
 
 if __name__ == '__main__':

From 2f4831f9b6dd857dcb3b8d53a8839474d3c5e5f7 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Thu, 13 Aug 2020 21:13:42 +0300
Subject: [PATCH 22/25] Small refactor after PR

---
 lark/lark.py             | 12 +++++-------
 lark/parser_frontends.py |  9 ++++++---
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/lark/lark.py b/lark/lark.py
index 3ed96d7..8371943 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -11,7 +11,7 @@ from .common import LexerConf, ParserConf
 
 from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken
 from .parse_tree_builder import ParseTreeBuilder
-from .parser_frontends import get_frontend
+from .parser_frontends import get_frontend, _get_lexer_callbacks
 from .grammar import Rule
 
 import re
@@ -278,12 +278,10 @@ class Lark(Serialize):
                     rule.options.priority = None
 
         # TODO Deprecate lexer_callbacks?
-        lexer_callbacks = dict(self.options.lexer_callbacks)
-        if self.options.transformer:
-            t = self.options.transformer
-            for term in self.terminals:
-                if hasattr(t, term.name):
-                    lexer_callbacks[term.name] = getattr(t, term.name)
+        lexer_callbacks = (_get_lexer_callbacks(self.options.transformer, self.terminals)
+                           if self.options.transformer
+                           else {})
+        lexer_callbacks.update(self.options.lexer_callbacks)
 
         self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes)
 
diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py
index a45bf9c..b993b9f 100644
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -58,9 +58,8 @@ class _ParserFrontend(Serialize):
         return self.parser.parse(input, start, *args)
 
 
-def _recreate_lexer_callbacks(memo, transformer):
+def _get_lexer_callbacks(transformer, terminals):
     result = {}
-    terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]
     for terminal in terminals:
         callback = getattr(transformer, terminal.name, None)
         if callback is not None:
@@ -85,12 +84,16 @@ class WithLexer(_ParserFrontend):
     @classmethod
     def deserialize(cls, data, memo, callbacks, postlex, transformer, re_module):
         inst = super(WithLexer, cls).deserialize(data, memo)
+
         inst.postlex = postlex
         inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
-        inst.lexer_conf.callbacks = _recreate_lexer_callbacks(memo, transformer)
+
+        terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]
+        inst.lexer_conf.callbacks = _get_lexer_callbacks(transformer, terminals)
         inst.lexer_conf.re_module = re_module
         inst.lexer_conf.skip_validation=True
         inst.init_lexer()
+
         return inst
 
     def _serialize(self, data, memo):

From 5559b1a21167c662c385e47e52f27c0cc470c278 Mon Sep 17 00:00:00 2001
From: Blank Spruce <32396809+BlankSpruce@users.noreply.github.com>
Date: Fri, 14 Aug 2020 12:08:02 +0200
Subject: [PATCH 23/25] Add missing elements in standalone parser

Add:
- missing imports
- __version__ variable

Additionally regenerated json parser example
---
 examples/standalone/json_parser.py | 178 ++++++++++++++++++++---------
 lark/exceptions.py                 |   5 +-
 lark/tools/standalone.py           |   2 +
 lark/tree.py                       |   4 +-
 4 files changed, 134 insertions(+), 55 deletions(-)

diff --git a/examples/standalone/json_parser.py b/examples/standalone/json_parser.py
index c9a5147..cadc51d 100644
--- a/examples/standalone/json_parser.py
+++ b/examples/standalone/json_parser.py
@@ -1,4 +1,6 @@
 # The file was automatically generated by Lark v0.9.0
+__version__ = "0.9.0"
+
 #
 #
 #   Lark Stand-alone Generator Tool
@@ -27,6 +29,9 @@
 import os
 from io import open
 
+import logging
+
+
 class LarkError(Exception):
     pass
 
@@ -54,38 +59,55 @@ class UnexpectedInput(LarkError):
         pos = self.pos_in_stream
         start = max(pos - span, 0)
         end = pos + span
-        before = text[start:pos].rsplit('\n', 1)[-1]
-        after = text[pos:end].split('\n', 1)[0]
-        return before + after + '\n' + ' ' * len(before) + '^\n'
+        if not isinstance(text, bytes):
+            before = text[start:pos].rsplit('\n', 1)[-1]
+            after = text[pos:end].split('\n', 1)[0]
+            return before + after + '\n' + ' ' * len(before) + '^\n'
+        else:
+            before = text[start:pos].rsplit(b'\n', 1)[-1]
+            after = text[pos:end].split(b'\n', 1)[0]
+            return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace")
 
-    def match_examples(self, parse_fn, examples, token_type_match_fallback=False):
+    def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False):
         """ Given a parser instance and a dictionary mapping some label with
             some malformed syntax examples, it'll return the label for the
             example that bests matches the current error.
+
+            It's recommended to call this with `use_accepts=True`. The default is False for backwards compatibility.
         """
         assert self.state is not None, "Not supported for this exception"
 
+        if isinstance(examples, dict):
+            examples = examples.items()
+
         candidate = (None, False)
-        for label, example in examples.items():
+        for i, (label, example) in enumerate(examples):
             assert not isinstance(example, STRING_TYPE)
 
-            for malformed in example:
+            for j, malformed in enumerate(example):
                 try:
                     parse_fn(malformed)
                 except UnexpectedInput as ut:
                     if ut.state == self.state:
+                        if use_accepts and ut.accepts != self.accepts:
+                            logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
+                                        (self.state, self.accepts, ut.accepts, i, j))
+                            continue
                         try:
                             if ut.token == self.token:  # Try exact match first
+                                logging.debug("Exact Match at example [%s][%s]" % (i, j))
                                 return label
 
                             if token_type_match_fallback:
                                 # Fallback to token types match
                                 if (ut.token.type == self.token.type) and not candidate[-1]:
+                                    logging.debug("Token Type Fallback at example [%s][%s]" % (i, j))
                                     candidate = label, True
 
                         except AttributeError:
                             pass
                         if not candidate[0]:
+                            logging.debug("Same State match at example [%s][%s]" % (i, j))
                             candidate = label, False
 
         return candidate[0]
@@ -93,15 +115,20 @@ class UnexpectedInput(LarkError):
 
 class UnexpectedCharacters(LexError, UnexpectedInput):
     def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
-        message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)
-
         self.line = line
         self.column = column
-        self.allowed = allowed
-        self.considered_tokens = considered_tokens
         self.pos_in_stream = lex_pos
         self.state = state
 
+        self.allowed = allowed
+        self.considered_tokens = considered_tokens
+
+        if isinstance(seq, bytes):
+            _s = seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace")
+        else:
+            _s = seq[lex_pos]
+
+        message = "No terminal defined for '%s' at line %d col %d" % (_s, line, column)
         message += '\n\n' + self.get_context(seq)
         if allowed:
             message += '\nExpecting: %s\n' % allowed
@@ -114,18 +141,23 @@ class UnexpectedCharacters(LexError, UnexpectedInput):
 
 class UnexpectedToken(ParseError, UnexpectedInput):
     def __init__(self, token, expected, considered_rules=None, state=None, puppet=None):
-        self.token = token
-        self.expected = expected     # XXX str shouldn't necessary
         self.line = getattr(token, 'line', '?')
         self.column = getattr(token, 'column', '?')
-        self.considered_rules = considered_rules
-        self.state = state
         self.pos_in_stream = getattr(token, 'pos_in_stream', None)
+        self.state = state
+
+        self.token = token
+        self.expected = expected     # XXX deprecate? `accepts` is better
+        self.considered_rules = considered_rules
         self.puppet = puppet
 
+        # TODO Only calculate `accepts()` when we need to display it to the user
+        # This will improve performance when doing automatic error handling
+        self.accepts = puppet and puppet.accepts()
+
         message = ("Unexpected token %r at line %s, column %s.\n"
                    "Expected one of: \n\t* %s\n"
-                   % (token, self.line, self.column, '\n\t* '.join(self.expected)))
+                   % (token, self.line, self.column, '\n\t* '.join(self.accepts or self.expected)))
 
         super(UnexpectedToken, self).__init__(message)
 
@@ -286,6 +318,9 @@ def get_regexp_width(expr):
         raise ValueError(expr)
 
 
+from collections import OrderedDict
+
+
 class Meta:
     def __init__(self):
         self.empty = True
@@ -364,6 +399,8 @@ class Discard(Exception):
 # Transformers
 
 class _Decoratable:
+    "Provides support for decorating methods with @v_args"
+
     @classmethod
     def _apply_decorator(cls, decorator, **kwargs):
         mro = getmro(cls)
@@ -978,8 +1015,7 @@ class Token(Str):
         try:
             self = super(Token, cls).__new__(cls, value)
         except UnicodeDecodeError:
-            # value = value.decode('latin1')
-            value = value.decode("ascii", "backslashreplace")
+            value = value.decode('latin1')
             self = super(Token, cls).__new__(cls, value)
 
         self.type = type_
@@ -1022,8 +1058,8 @@ class Token(Str):
 
 
 class LineCounter:
-    def __init__(self):
-        self.newline_char = '\n'
+    def __init__(self, newline_char):
+        self.newline_char = newline_char
         self.char_pos = 0
         self.line = 1
         self.column = 1
@@ -1052,7 +1088,7 @@ class _Lex:
     def lex(self, stream, newline_types, ignore_types):
         newline_types = frozenset(newline_types)
         ignore_types = frozenset(ignore_types)
-        line_ctr = LineCounter()
+        line_ctr = LineCounter('\n' if not self.lexer.use_bytes else b'\n')
         last_token = None
 
         while line_ctr.char_pos < len(stream):
@@ -1113,7 +1149,7 @@ class CallChain:
 
 
 
-def _create_unless(terminals, g_regex_flags, re_):
+def _create_unless(terminals, g_regex_flags, re_, use_bytes):
     tokens_by_type = classify(terminals, lambda t: type(t.pattern))
     assert len(tokens_by_type) <= 2, tokens_by_type.keys()
     embedded_strs = set()
@@ -1130,31 +1166,34 @@ def _create_unless(terminals, g_regex_flags, re_):
                 if strtok.pattern.flags <= retok.pattern.flags:
                     embedded_strs.add(strtok)
         if unless:
-            callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, re_, match_whole=True))
+            callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes))
 
     terminals = [t for t in terminals if t not in embedded_strs]
     return terminals, callback
 
 
-def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_):
+def _build_mres(terminals, max_size, g_regex_flags, match_whole, re_, use_bytes):
     # Python sets an unreasonable group limit (currently 100) in its re module
     # Worse, the only way to know we reached it is by catching an AssertionError!
     # This function recursively tries less and less groups until it's successful.
     postfix = '$' if match_whole else ''
     mres = []
     while terminals:
+        pattern = u'|'.join(u'(?P<%s>%s)' % (t.name, t.pattern.to_regexp() + postfix) for t in terminals[:max_size])
+        if use_bytes:
+            pattern = pattern.encode('latin-1')
         try:
-            mre = re_.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]), g_regex_flags)
+            mre = re_.compile(pattern, g_regex_flags)
         except AssertionError:  # Yes, this is what Python provides us.. :/
-            return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_)
+            return _build_mres(terminals, max_size//2, g_regex_flags, match_whole, re_, use_bytes)
 
         # terms_from_name = {t.name: t for t in terminals[:max_size]}
         mres.append((mre, {i:n for n,i in mre.groupindex.items()} ))
         terminals = terminals[max_size:]
     return mres
 
-def build_mres(terminals, g_regex_flags, re_, match_whole=False):
-    return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_)
+def build_mres(terminals, g_regex_flags, re_, use_bytes, match_whole=False):
+    return _build_mres(terminals, len(terminals), g_regex_flags, match_whole, re_, use_bytes)
 
 def _regexp_has_newline(r):
     r"""Expressions that may indicate newlines in a regexp:
@@ -1204,12 +1243,13 @@ class TraditionalLexer(Lexer):
         self.terminals = terminals
         self.user_callbacks = conf.callbacks
         self.g_regex_flags = conf.g_regex_flags
+        self.use_bytes = conf.use_bytes
 
         self._mres = None
         # self.build(g_regex_flags)
 
     def _build(self):
-        terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re)
+        terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re, use_bytes=self.use_bytes)
         assert all(self.callback.values())
 
         for type_, f in self.user_callbacks.items():
@@ -1219,7 +1259,7 @@ class TraditionalLexer(Lexer):
             else:
                 self.callback[type_] = f
 
-        self._mres = build_mres(terminals, self.g_regex_flags, self.re)
+        self._mres = build_mres(terminals, self.g_regex_flags, self.re, self.use_bytes)
 
     @property
     def mres(self):
@@ -1248,7 +1288,8 @@ class ContextualLexer(Lexer):
             assert t.name not in tokens_by_name, t
             tokens_by_name[t.name] = t
 
-        trad_conf = type(conf)(terminals, conf.re_module, conf.ignore, callbacks=conf.callbacks, g_regex_flags=conf.g_regex_flags, skip_validation=conf.skip_validation)
+        trad_conf = copy(conf)
+        trad_conf.tokens = terminals
 
         lexer_by_tokens = {}
         self.lexers = {}
@@ -1293,10 +1334,10 @@ class ContextualLexer(Lexer):
 
 
 class LexerConf(Serialize):
-    __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags'
+    __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags', 'use_bytes'
     __serialize_namespace__ = TerminalDef,
 
-    def __init__(self, tokens, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False):
+    def __init__(self, tokens, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False):
         self.tokens = tokens    # TODO should be terminals
         self.ignore = ignore
         self.postlex = postlex
@@ -1304,9 +1345,7 @@ class LexerConf(Serialize):
         self.g_regex_flags = g_regex_flags
         self.re_module = re_module
         self.skip_validation = skip_validation
-
-    def _deserialize(self):
-        self.callbacks = {} # TODO
+        self.use_bytes = use_bytes
 
 
 from functools import partial, wraps
@@ -1627,10 +1666,10 @@ class _Parser:
             try:
                 return states[state][token.type]
             except KeyError:
-                expected = [s for s in states[state].keys() if s.isupper()]
+                expected = {s for s in states[state].keys() if s.isupper()}
                 try:
                     puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state)
-                except NameError:
+                except NameError:   # For standalone parser
                     puppet = None
                 raise UnexpectedToken(token, expected, state=state, puppet=puppet)
 
@@ -1760,7 +1799,14 @@ def get_frontend(parser, lexer):
         elif lexer == 'contextual':
             return LALR_ContextualLexer
         elif issubclass(lexer, Lexer):
-            return partial(LALR_CustomLexer, lexer)
+            class LALR_CustomLexerWrapper(LALR_CustomLexer):
+                def __init__(self, lexer_conf, parser_conf, options=None):
+                    super(LALR_CustomLexerWrapper, self).__init__(
+                        lexer, lexer_conf, parser_conf, options=options)
+                def init_lexer(self):
+                    self.lexer = lexer(self.lexer_conf)
+
+            return LALR_CustomLexerWrapper
         else:
             raise ValueError('Unknown lexer: %s' % lexer)
     elif parser=='earley':
@@ -1793,6 +1839,15 @@ class _ParserFrontend(Serialize):
         return self.parser.parse(input, start, *args)
 
 
+def _get_lexer_callbacks(transformer, terminals):
+    result = {}
+    for terminal in terminals:
+        callback = getattr(transformer, terminal.name, None)
+        if callback is not None:
+            result[terminal.name] = callback
+    return result
+
+
 class WithLexer(_ParserFrontend):
     lexer = None
     parser = None
@@ -1808,13 +1863,18 @@ class WithLexer(_ParserFrontend):
         self.postlex = lexer_conf.postlex
 
     @classmethod
-    def deserialize(cls, data, memo, callbacks, postlex, re_module):
+    def deserialize(cls, data, memo, callbacks, postlex, transformer, re_module):
         inst = super(WithLexer, cls).deserialize(data, memo)
+
         inst.postlex = postlex
         inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
+
+        terminals = [item for item in memo.values() if isinstance(item, TerminalDef)]
+        inst.lexer_conf.callbacks = _get_lexer_callbacks(transformer, terminals)
         inst.lexer_conf.re_module = re_module
         inst.lexer_conf.skip_validation=True
         inst.init_lexer()
+
         return inst
 
     def _serialize(self, data, memo):
@@ -1922,6 +1982,7 @@ class LarkOptions(Serialize):
                 invert (Default: auto)
     lexer_callbacks - Dictionary of callbacks for the lexer. May alter
                         tokens during lexing. Use with caution.
+    use_bytes - Accept an input of type `bytes` instead of `str` (Python 3 only).
     edit_terminals - A callback
     """
     if __doc__:
@@ -1945,6 +2006,7 @@ class LarkOptions(Serialize):
         'maybe_placeholders': False,
         'edit_terminals': None,
         'g_regex_flags': 0,
+        'use_bytes': False,
     }
 
     def __init__(self, options_dict):
@@ -1954,7 +2016,7 @@ class LarkOptions(Serialize):
         for name, default in self._defaults.items():
             if name in o:
                 value = o.pop(name)
-                if isinstance(default, bool) and name != 'cache':
+                if isinstance(default, bool) and name not in ('cache', 'use_bytes'):
                     value = bool(value)
             else:
                 value = default
@@ -2027,6 +2089,13 @@ class Lark(Serialize):
             grammar = read()
 
         assert isinstance(grammar, STRING_TYPE)
+        self.grammar_source = grammar
+        if self.options.use_bytes:
+            if not isascii(grammar):
+                raise ValueError("Grammar must be ascii only, when use_bytes=True")
+            if sys.version_info[0] == 2 and self.options.use_bytes != 'force':
+                raise NotImplementedError("`use_bytes=True` may have issues on python2."
+                                          "Use `use_bytes='force'` to use it at your own risk.")
 
         cache_fn = None
         if self.options.cache:
@@ -2036,7 +2105,7 @@ class Lark(Serialize):
                 cache_fn = self.options.cache
             else:
                 if self.options.cache is not True:
-                    raise ValueError("cache must be bool or str")
+                    raise ValueError("cache argument must be bool or str")
                 unhashable = ('transformer', 'postlex', 'lexer_callbacks', 'edit_terminals')
                 from . import __version__
                 options_str = ''.join(k+str(v) for k, v in options.items() if k not in unhashable)
@@ -2092,7 +2161,7 @@ class Lark(Serialize):
             for t in self.terminals:
                 self.options.edit_terminals(t)
 
-        self._terminals_dict = {t.name:t for t in self.terminals}
+        self._terminals_dict = {t.name: t for t in self.terminals}
 
         # If the user asked to invert the priorities, negate them all here.
         # This replaces the old 'resolve__antiscore_sum' option.
@@ -2109,14 +2178,12 @@ class Lark(Serialize):
                     rule.options.priority = None
 
         # TODO Deprecate lexer_callbacks?
-        lexer_callbacks = dict(self.options.lexer_callbacks)
-        if self.options.transformer:
-            t = self.options.transformer
-            for term in self.terminals:
-                if hasattr(t, term.name):
-                    lexer_callbacks[term.name] = getattr(t, term.name)
+        lexer_callbacks = (_get_lexer_callbacks(self.options.transformer, self.terminals)
+                           if self.options.transformer
+                           else {})
+        lexer_callbacks.update(self.options.lexer_callbacks)
 
-        self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags)
+        self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes)
 
         if self.options.parser:
             self.parser = self._build_parser()
@@ -2175,7 +2242,14 @@ class Lark(Serialize):
         self.rules = [Rule.deserialize(r, memo) for r in data['rules']]
         self.source = '<deserialized>'
         self._prepare_callbacks()
-        self.parser = self.parser_class.deserialize(data['parser'], memo, self._callbacks, self.options.postlex, re_module)
+        self.parser = self.parser_class.deserialize(
+            data['parser'],
+            memo,
+            self._callbacks,
+            self.options.postlex,
+            self.options.transformer,
+            re_module
+        )
         return self
 
     @classmethod
@@ -2244,10 +2318,10 @@ class Lark(Serialize):
 
 
 DATA = (
-{'rules': [{'@': 23}, {'@': 31}, {'@': 26}, {'@': 13}, {'@': 24}, {'@': 19}, {'@': 14}, {'@': 27}, {'@': 28}, {'@': 16}, {'@': 29}, {'@': 12}, {'@': 25}, {'@': 30}, {'@': 20}, {'@': 22}, {'@': 15}, {'@': 21}, {'@': 17}, {'@': 18}], 'parser': {'lexer_conf': {'tokens': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], 'ignore': [u'WS'], 'g_regex_flags': 0, '__type__': 'LexerConf'}, 'parser': {'tokens': {0: 'COMMA', 1: 'RSQB', 2: 'RBRACE', 3: '$END', 4: 'LBRACE', 5: u'FALSE', 6: u'string', 7: u'object', 8: u'NULL', 9: u'SIGNED_NUMBER', 10: u'value', 11: u'array', 12: u'ESCAPED_STRING', 13: u'TRUE', 14: 'LSQB', 15: 'COLON', 16: u'pair', 17: u'__array_star_0', 18: u'__object_star_1', 19: 'start'}, 'states': {0: {0: (1, {'@': 12}), 1: (1, {'@': 12}), 2: (1, {'@': 12}), 3: (1, {'@': 12})}, 1: {1: (0, 29), 4: (0, 33), 5: (0, 8), 6: (0, 5), 7: (0, 31), 8: (0, 27), 9: (0, 24), 10: (0, 6), 11: (0, 26), 12: (0, 21), 13: (0, 16), 14: (0, 1)}, 2: {0: (0, 23), 2: (0, 0)}, 3: {15: (0, 12)}, 4: {16: (0, 13), 12: (0, 21), 6: (0, 3)}, 5: {0: (1, {'@': 13}), 1: (1, {'@': 13}), 2: (1, {'@': 13}), 3: (1, {'@': 13})}, 6: {0: (0, 7), 1: (0, 11), 17: (0, 17)}, 7: {4: (0, 33), 5: (0, 8), 6: (0, 5), 7: (0, 31), 8: (0, 27), 9: (0, 24), 10: (0, 9), 11: (0, 26), 12: (0, 21), 13: (0, 16), 14: (0, 1)}, 8: {0: (1, {'@': 14}), 1: (1, {'@': 14}), 2: (1, {'@': 14}), 3: (1, {'@': 14})}, 9: {0: (1, {'@': 15}), 1: (1, {'@': 15})}, 10: {4: (0, 33), 5: (0, 8), 6: (0, 5), 7: (0, 31), 8: (0, 27), 9: (0, 24), 10: (0, 20), 11: (0, 26), 12: (0, 21), 13: (0, 16), 14: (0, 1)}, 11: {0: (1, {'@': 16}), 1: (1, {'@': 16}), 2: (1, {'@': 16}), 3: (1, {'@': 16})}, 12: {4: (0, 33), 5: (0, 8), 6: (0, 5), 7: (0, 31), 8: (0, 27), 9: (0, 24), 10: (0, 18), 11: (0, 26), 12: (0, 21), 13: (0, 16), 14: (0, 1)}, 13: {0: (1, {'@': 17}), 2: (1, {'@': 17})}, 14: {}, 15: {0: (1, {'@': 18}), 2: (1, {'@': 18})}, 16: {0: (1, {'@': 19}), 1: (1, {'@': 19}), 2: (1, {'@': 19}), 3: (1, {'@': 19})}, 17: {0: (0, 10), 1: (0, 28)}, 18: {0: (1, {'@': 20}), 2: (1, {'@': 20})}, 19: {0: (0, 4), 18: (0, 2), 2: (0, 25)}, 20: {0: (1, {'@': 21}), 1: (1, {'@': 21})}, 21: {0: (1, {'@': 22}), 1: (1, {'@': 22}), 2: (1, {'@': 22}), 3: (1, {'@': 22}), 15: (1, {'@': 22})}, 22: {3: (1, {'@': 23})}, 23: {16: (0, 15), 12: (0, 21), 6: (0, 3)}, 24: {0: (1, {'@': 24}), 1: (1, {'@': 24}), 2: (1, {'@': 24}), 3: (1, {'@': 24})}, 25: {0: (1, {'@': 25}), 1: (1, {'@': 25}), 2: (1, {'@': 25}), 3: (1, {'@': 25})}, 26: {0: (1, {'@': 26}), 1: (1, {'@': 26}), 2: (1, {'@': 26}), 3: (1, {'@': 26})}, 27: {0: (1, {'@': 27}), 1: (1, {'@': 27}), 2: (1, {'@': 27}), 3: (1, {'@': 27})}, 28: {0: (1, {'@': 28}), 1: (1, {'@': 28}), 2: (1, {'@': 28}), 3: (1, {'@': 28})}, 29: {0: (1, {'@': 29}), 1: (1, {'@': 29}), 2: (1, {'@': 29}), 3: (1, {'@': 29})}, 30: {0: (1, {'@': 30}), 1: (1, {'@': 30}), 2: (1, {'@': 30}), 3: (1, {'@': 30})}, 31: {0: (1, {'@': 31}), 1: (1, {'@': 31}), 2: (1, {'@': 31}), 3: (1, {'@': 31})}, 32: {4: (0, 33), 5: (0, 8), 6: (0, 5), 7: (0, 31), 8: (0, 27), 9: (0, 24), 10: (0, 22), 11: (0, 26), 12: (0, 21), 13: (0, 16), 14: (0, 1), 19: (0, 14)}, 33: {16: (0, 19), 2: (0, 30), 12: (0, 21), 6: (0, 3)}}, 'end_states': {'start': 14}, 'start_states': {'start': 32}}, '__type__': 'LALR_ContextualLexer', 'start': ['start']}, '__type__': 'Lark', 'options': {'regex': False, 'transformer': None, 'lexer': 'contextual', 'lexer_callbacks': {}, 'start': ['start'], 'debug': False, 'postlex': None, 'parser': 'lalr', 'tree_class': None, 'priority': None, 'cache': False, 'g_regex_flags': 0, 'keep_all_tokens': False, 'ambiguity': 'auto', 'edit_terminals': None, 'propagate_positions': False, 'maybe_placeholders': False}}
+{'parser': {'parser': {'tokens': {0: 'RSQB', 1: 'COMMA', 2: '$END', 3: 'RBRACE', 4: 'ESCAPED_STRING', 5: 'string', 6: 'pair', 7: 'LSQB', 8: 'LBRACE', 9: 'SIGNED_NUMBER', 10: 'NULL', 11: 'FALSE', 12: 'value', 13: 'array', 14: 'object', 15: 'TRUE', 16: '__array_star_0', 17: 'COLON', 18: '__object_star_1', 19: 'start'}, 'states': {0: {0: (1, {'@': 12}), 1: (1, {'@': 12}), 2: (1, {'@': 12}), 3: (1, {'@': 12})}, 1: {0: (1, {'@': 13}), 1: (1, {'@': 13}), 2: (1, {'@': 13}), 3: (1, {'@': 13})}, 2: {1: (0, 25), 0: (0, 19)}, 3: {0: (1, {'@': 14}), 1: (1, {'@': 14}), 2: (1, {'@': 14}), 3: (1, {'@': 14})}, 4: {4: (0, 31), 5: (0, 13), 6: (0, 26)}, 5: {0: (1, {'@': 15}), 1: (1, {'@': 15}), 2: (1, {'@': 15}), 3: (1, {'@': 15})}, 6: {0: (1, {'@': 16}), 1: (1, {'@': 16}), 2: (1, {'@': 16}), 3: (1, {'@': 16})}, 7: {0: (1, {'@': 17}), 1: (1, {'@': 17}), 2: (1, {'@': 17}), 3: (1, {'@': 17})}, 8: {1: (0, 14), 3: (0, 28)}, 9: {0: (0, 21), 7: (0, 9), 8: (0, 18), 9: (0, 0), 10: (0, 1), 11: (0, 29), 5: (0, 5), 12: (0, 10), 13: (0, 7), 14: (0, 33), 4: (0, 31), 15: (0, 24)}, 10: {1: (0, 20), 16: (0, 2), 0: (0, 3)}, 11: {0: (1, {'@': 18}), 1: (1, {'@': 18})}, 12: {2: (1, {'@': 19})}, 13: {17: (0, 32)}, 14: {5: (0, 13), 4: (0, 31), 6: (0, 23)}, 15: {18: (0, 8), 1: (0, 4), 3: (0, 17)}, 16: {0: (1, {'@': 20}), 1: (1, {'@': 20})}, 17: {0: (1, {'@': 21}), 1: (1, {'@': 21}), 2: (1, {'@': 21}), 3: (1, {'@': 21})}, 18: {4: (0, 31), 6: (0, 15), 5: (0, 13), 3: (0, 6)}, 19: {0: (1, {'@': 22}), 1: (1, {'@': 22}), 2: (1, {'@': 22}), 3: (1, {'@': 22})}, 20: {7: (0, 9), 8: (0, 18), 12: (0, 11), 9: (0, 0), 14: (0, 33), 10: (0, 1), 4: (0, 31), 15: (0, 24), 5: (0, 5), 11: (0, 29), 13: (0, 7)}, 21: {0: (1, {'@': 23}), 1: (1, {'@': 23}), 2: (1, {'@': 23}), 3: (1, {'@': 23})}, 22: {1: (1, {'@': 24}), 3: (1, {'@': 24})}, 23: {1: (1, {'@': 25}), 3: (1, {'@': 25})}, 24: {0: (1, {'@': 26}), 1: (1, {'@': 26}), 2: (1, {'@': 26}), 3: (1, {'@': 26})}, 25: {7: (0, 9), 12: (0, 16), 8: (0, 18), 9: (0, 0), 14: (0, 33), 10: (0, 1), 4: (0, 31), 15: (0, 24), 5: (0, 5), 11: (0, 29), 13: (0, 7)}, 26: {1: (1, {'@': 27}), 3: (1, {'@': 27})}, 27: {7: (0, 9), 8: (0, 18), 12: (0, 12), 9: (0, 0), 10: (0, 1), 11: (0, 29), 5: (0, 5), 13: (0, 7), 14: (0, 33), 4: (0, 31), 15: (0, 24), 19: (0, 30)}, 28: {0: (1, {'@': 28}), 1: (1, {'@': 28}), 2: (1, {'@': 28}), 3: (1, {'@': 28})}, 29: {0: (1, {'@': 29}), 1: (1, {'@': 29}), 2: (1, {'@': 29}), 3: (1, {'@': 29})}, 30: {}, 31: {17: (1, {'@': 30}), 0: (1, {'@': 30}), 1: (1, {'@': 30}), 2: (1, {'@': 30}), 3: (1, {'@': 30})}, 32: {7: (0, 9), 8: (0, 18), 12: (0, 22), 9: (0, 0), 14: (0, 33), 10: (0, 1), 4: (0, 31), 15: (0, 24), 5: (0, 5), 11: (0, 29), 13: (0, 7)}, 33: {0: (1, {'@': 31}), 1: (1, {'@': 31}), 2: (1, {'@': 31}), 3: (1, {'@': 31})}}, 'start_states': {'start': 27}, 'end_states': {'start': 30}}, 'lexer_conf': {'tokens': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}, {'@': 10}, {'@': 11}], 'ignore': ['WS'], 'g_regex_flags': 0, 'use_bytes': False, '__type__': 'LexerConf'}, 'start': ['start'], '__type__': 'LALR_ContextualLexer'}, 'rules': [{'@': 19}, {'@': 31}, {'@': 17}, {'@': 15}, {'@': 12}, {'@': 26}, {'@': 29}, {'@': 13}, {'@': 22}, {'@': 14}, {'@': 23}, {'@': 28}, {'@': 21}, {'@': 16}, {'@': 24}, {'@': 30}, {'@': 18}, {'@': 20}, {'@': 27}, {'@': 25}], 'options': {'debug': False, 'keep_all_tokens': False, 'tree_class': None, 'cache': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': None, 'ambiguity': 'auto', 'regex': False, 'propagate_positions': False, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0, 'use_bytes': False}, '__type__': 'Lark'}
 )
 MEMO = (
-{0: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [2, 4294967295], 'flags': [], 'value': u'\\".*?(?<!\\\\)(\\\\\\\\)*?\\"'}, '__type__': 'TerminalDef', 'name': u'ESCAPED_STRING'}, 1: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:[ \t\x0c\r\n])+'}, '__type__': 'TerminalDef', 'name': u'WS'}, 2: {'priority': 1, 'pattern': {'__type__': 'PatternRE', '_width': [1, 4294967295], 'flags': [], 'value': u'(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)'}, '__type__': 'TerminalDef', 'name': u'SIGNED_NUMBER'}, 3: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'true'}, '__type__': 'TerminalDef', 'name': u'TRUE'}, 4: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'false'}, '__type__': 'TerminalDef', 'name': u'FALSE'}, 5: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'null'}, '__type__': 'TerminalDef', 'name': u'NULL'}, 6: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u','}, '__type__': 'TerminalDef', 'name': 'COMMA'}, 7: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'['}, '__type__': 'TerminalDef', 'name': 'LSQB'}, 8: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u']'}, '__type__': 'TerminalDef', 'name': 'RSQB'}, 9: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'{'}, '__type__': 'TerminalDef', 'name': 'LBRACE'}, 10: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u'}'}, '__type__': 'TerminalDef', 'name': 'RBRACE'}, 11: {'priority': 1, 'pattern': {'__type__': 'PatternStr', 'flags': [], 'value': u':'}, '__type__': 'TerminalDef', 'name': 'COLON'}, 12: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'__type__': 'NonTerminal', 'name': u'__object_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': False}, 'alias': None, 'order': 0}, 13: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': True}, 'alias': None, 'order': 2}, 14: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'FALSE'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': True}, 'alias': u'false', 'order': 5}, 15: {'origin': {'__type__': 'NonTerminal', 'name': u'__array_star_0'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': False}, 'alias': None, 'order': 0}, 16: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': False}, 'alias': None, 'order': 1}, 17: {'origin': {'__type__': 'NonTerminal', 'name': u'__object_star_1'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': False}, 'alias': None, 'order': 0}, 18: {'origin': {'__type__': 'NonTerminal', 'name': u'__object_star_1'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'__object_star_1'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'pair'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': False}, 'alias': None, 'order': 1}, 19: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'TRUE'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': True}, 'alias': u'true', 'order': 4}, 20: {'origin': {'__type__': 'NonTerminal', 'name': u'pair'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'string'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COLON'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': False}, 'alias': None, 'order': 0}, 21: {'origin': {'__type__': 'NonTerminal', 'name': u'__array_star_0'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'__array_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'COMMA'}, {'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': False}, 'alias': None, 'order': 1}, 22: {'origin': {'__type__': 'NonTerminal', 'name': u'string'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'ESCAPED_STRING'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': False}, 'alias': None, 'order': 0}, 23: {'origin': {'__type__': 'NonTerminal', 'name': u'start'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'value'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': True}, 'alias': None, 'order': 0}, 24: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': False, '__type__': 'Terminal', 'name': u'SIGNED_NUMBER'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': True}, 'alias': u'number', 'order': 3}, 25: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'__type__': 'NonTerminal', 'name': u'pair'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': False}, 'alias': None, 'order': 1}, 26: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'array'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': True}, 'alias': None, 'order': 1}, 27: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': u'NULL'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': True}, 'alias': u'null', 'order': 6}, 28: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'__type__': 'NonTerminal', 'name': u'value'}, {'__type__': 'NonTerminal', 'name': u'__array_star_0'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': False}, 'alias': None, 'order': 0}, 29: {'origin': {'__type__': 'NonTerminal', 'name': u'array'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LSQB'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RSQB'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': [False, True, False], 'expand1': False}, 'alias': None, 'order': 2}, 30: {'origin': {'__type__': 'NonTerminal', 'name': u'object'}, '__type__': 'Rule', 'expansion': [{'filter_out': True, '__type__': 'Terminal', 'name': 'LBRACE'}, {'filter_out': True, '__type__': 'Terminal', 'name': 'RBRACE'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': [False, True, False], 'expand1': False}, 'alias': None, 'order': 2}, 31: {'origin': {'__type__': 'NonTerminal', 'name': u'value'}, '__type__': 'Rule', 'expansion': [{'__type__': 'NonTerminal', 'name': u'object'}], 'options': {'template_source': None, '__type__': 'RuleOptions', 'priority': None, 'keep_all_tokens': False, 'empty_indices': (), 'expand1': True}, 'alias': None, 'order': 0}}
+{0: {'name': 'ESCAPED_STRING', 'pattern': {'value': '".*?(?<!\\\\)(\\\\\\\\)*?"', 'flags': [], '_width': [2, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 1: {'name': 'SIGNED_NUMBER', 'pattern': {'value': '(?:(?:\\+|\\-))?(?:(?:(?:[0-9])+(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+|(?:(?:[0-9])+\\.(?:(?:[0-9])+)?|\\.(?:[0-9])+)(?:(?:e|E)(?:(?:\\+|\\-))?(?:[0-9])+)?)|(?:[0-9])+)', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 2: {'name': 'WS', 'pattern': {'value': '(?:[ \t\x0c\r\n])+', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 3: {'name': 'TRUE', 'pattern': {'value': 'true', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 4: {'name': 'FALSE', 'pattern': {'value': 'false', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 5: {'name': 'NULL', 'pattern': {'value': 'null', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 6: {'name': 'COMMA', 'pattern': {'value': ',', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 7: {'name': 'LSQB', 'pattern': {'value': '[', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 8: {'name': 'RSQB', 'pattern': {'value': ']', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 9: {'name': 'LBRACE', 'pattern': {'value': '{', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 10: {'name': 'RBRACE', 'pattern': {'value': '}', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 11: {'name': 'COLON', 'pattern': {'value': ':', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 12: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'SIGNED_NUMBER', 'filter_out': False, '__type__': 'Terminal'}], 'order': 3, 'alias': 'number', 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 13: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'NULL', 'filter_out': True, '__type__': 'Terminal'}], 'order': 6, 'alias': 'null', 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 14: {'origin': {'name': 'array', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 15: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'string', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 16: {'origin': {'name': 'object', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LBRACE', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'RBRACE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': [False, True, False], '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 17: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'array', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 18: {'origin': {'name': '__array_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 19: {'origin': {'name': 'start', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 20: {'origin': {'name': '__array_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__array_star_0', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 21: {'origin': {'name': 'object', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LBRACE', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'pair', '__type__': 'NonTerminal'}, {'name': 'RBRACE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 22: {'origin': {'name': 'array', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}, {'name': '__array_star_0', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 23: {'origin': {'name': 'array', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': [False, True, False], '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 24: {'origin': {'name': 'pair', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'string', '__type__': 'NonTerminal'}, {'name': 'COLON', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'value', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 25: {'origin': {'name': '__object_star_1', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__object_star_1', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'pair', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 26: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'TRUE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 4, 'alias': 'true', 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 27: {'origin': {'name': '__object_star_1', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'pair', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 28: {'origin': {'name': 'object', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LBRACE', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'pair', '__type__': 'NonTerminal'}, {'name': '__object_star_1', '__type__': 'NonTerminal'}, {'name': 'RBRACE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 29: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'FALSE', 'filter_out': True, '__type__': 'Terminal'}], 'order': 5, 'alias': 'false', 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 30: {'origin': {'name': 'string', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'ESCAPED_STRING', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 31: {'origin': {'name': 'value', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'object', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'template_source': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}}
 )
 Shift = 0
 Reduce = 1
diff --git a/lark/exceptions.py b/lark/exceptions.py
index 7330125..d1b956d 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -1,8 +1,9 @@
-import logging
-
 from .utils import STRING_TYPE
 
 ###{standalone
+import logging
+
+
 class LarkError(Exception):
     pass
 
diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py
index 72042cd..1b5dc6e 100644
--- a/lark/tools/standalone.py
+++ b/lark/tools/standalone.py
@@ -88,6 +88,8 @@ def main(fobj, start):
     lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)
 
     print('# The file was automatically generated by Lark v%s' % lark.__version__)
+    print('__version__ = "%s"' % lark.__version__)
+    print()
 
     for pyfile in EXTRACT_STANDALONE_FILES:
         with open(os.path.join(_larkdir, pyfile)) as f:
diff --git a/lark/tree.py b/lark/tree.py
index f9767e4..5a594f1 100644
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -4,10 +4,12 @@ except ImportError:
     pass
 
 from copy import deepcopy
-from collections import OrderedDict
 
 
 ###{standalone
+from collections import OrderedDict
+
+
 class Meta:
     def __init__(self):
         self.empty = True

From af3bedd39d1f452a14d60afc528ee78490664dbf Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Fri, 14 Aug 2020 16:17:26 +0300
Subject: [PATCH 24/25] Adjustments to logging PR

---
 lark/__init__.py              |  2 +-
 lark/common.py                |  7 ---
 lark/exceptions.py            | 11 ++--
 lark/lark.py                  |  4 +-
 lark/parsers/earley.py        |  2 +-
 lark/parsers/lalr_analysis.py |  5 +-
 lark/utils.py                 | 96 ++++++++++++++++++-----------------
 7 files changed, 61 insertions(+), 66 deletions(-)

diff --git a/lark/__init__.py b/lark/__init__.py
index 9bd88b0..1b5e7e3 100644
--- a/lark/__init__.py
+++ b/lark/__init__.py
@@ -1,4 +1,4 @@
-from .common import logger
+from .utils import logger
 from .tree import Tree
 from .visitors import Transformer, Visitor, v_args, Discard
 from .visitors import InlineTransformer, inline_args   # XXX Deprecated
diff --git a/lark/common.py b/lark/common.py
index b333dcb..714399a 100644
--- a/lark/common.py
+++ b/lark/common.py
@@ -1,13 +1,6 @@
-import logging
 from .utils import Serialize
 from .lexer import TerminalDef
 
-logger = logging.getLogger("lark")
-logger.addHandler(logging.StreamHandler())
-# Set to highest level, since we have some warnings amongst the code
-# By default, we should not output any log messages
-logger.setLevel(logging.CRITICAL)
-
 ###{standalone
 
 class LexerConf(Serialize):
diff --git a/lark/exceptions.py b/lark/exceptions.py
index d1b956d..9d2d8dc 100644
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -1,7 +1,6 @@
-from .utils import STRING_TYPE
+from .utils import STRING_TYPE, logger
 
 ###{standalone
-import logging
 
 
 class LarkError(Exception):
@@ -62,24 +61,24 @@ class UnexpectedInput(LarkError):
                 except UnexpectedInput as ut:
                     if ut.state == self.state:
                         if use_accepts and ut.accepts != self.accepts:
-                            logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
+                            logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
                                         (self.state, self.accepts, ut.accepts, i, j))
                             continue
                         try:
                             if ut.token == self.token:  # Try exact match first
-                                logging.debug("Exact Match at example [%s][%s]" % (i, j))
+                                logger.debug("Exact Match at example [%s][%s]" % (i, j))
                                 return label
 
                             if token_type_match_fallback:
                                 # Fallback to token types match
                                 if (ut.token.type == self.token.type) and not candidate[-1]:
-                                    logging.debug("Token Type Fallback at example [%s][%s]" % (i, j))
+                                    logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
                                     candidate = label, True
 
                         except AttributeError:
                             pass
                         if not candidate[0]:
-                            logging.debug("Same State match at example [%s][%s]" % (i, j))
+                            logger.debug("Same State match at example [%s][%s]" % (i, j))
                             candidate = label, False
 
         return candidate[0]
diff --git a/lark/lark.py b/lark/lark.py
index ddea2d6..9a4e001 100644
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -4,10 +4,10 @@ import sys, os, pickle, hashlib
 from io import open
 
 
-from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii
+from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger
 from .load_grammar import load_grammar
 from .tree import Tree
-from .common import LexerConf, ParserConf, logger
+from .common import LexerConf, ParserConf
 
 from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken
 from .parse_tree_builder import ParseTreeBuilder
diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py
index bf099e6..098639d 100644
--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -14,7 +14,7 @@ from collections import deque
 
 from ..visitors import Transformer_InPlace, v_args
 from ..exceptions import UnexpectedEOF, UnexpectedToken
-from ..common import logger
+from ..utils import logger
 from .grammar_analysis import GrammarAnalyzer
 from ..grammar import NonTerminal
 from .earley_common import Item, TransitiveItem
diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py
index 861941f..7a94b4d 100644
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -6,11 +6,10 @@ For now, shift/reduce conflicts are automatically resolved as shifts.
 # Author: Erez Shinan (2017)
 # Email : erezshin@gmail.com
 
-from collections import defaultdict, deque
+from collections import defaultdict
 
-from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
+from ..utils import classify, classify_bool, bfs, fzset, Enumerator, logger
 from ..exceptions import GrammarError
-from ..common import logger
 
 from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
 from ..grammar import Rule
diff --git a/lark/utils.py b/lark/utils.py
index c70b947..0c41e6b 100644
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -4,51 +4,15 @@ from functools import reduce
 from ast import literal_eval
 from collections import deque
 
-class fzset(frozenset):
-    def __repr__(self):
-        return '{%s}' % ', '.join(map(repr, self))
-
-
-def classify_bool(seq, pred):
-    true_elems = []
-    false_elems = []
-
-    for elem in seq:
-        if pred(elem):
-            true_elems.append(elem)
-        else:
-            false_elems.append(elem)
-
-    return true_elems, false_elems
-
-
-
-def bfs(initial, expand):
-    open_q = deque(list(initial))
-    visited = set(open_q)
-    while open_q:
-        node = open_q.popleft()
-        yield node
-        for next_node in expand(node):
-            if next_node not in visited:
-                visited.add(next_node)
-                open_q.append(next_node)
-
-
+###{standalone
+import logging
+logger = logging.getLogger("lark")
+logger.addHandler(logging.StreamHandler())
+# Set to highest level, since we have some warnings amongst the code
+# By default, we should not output any log messages
+logger.setLevel(logging.CRITICAL)
 
 
-def _serialize(value, memo):
-    if isinstance(value, Serialize):
-        return value.serialize(memo)
-    elif isinstance(value, list):
-        return [_serialize(elem, memo) for elem in value]
-    elif isinstance(value, frozenset):
-        return list(value)  # TODO reversible?
-    elif isinstance(value, dict):
-        return {key:_serialize(elem, memo) for key, elem in value.items()}
-    return value
-
-###{standalone
 def classify(seq, key=None, value=None):
     d = {}
     for item in seq:
@@ -302,13 +266,11 @@ def combine_alternatives(lists):
     return reduce(lambda a,b: [i+[j] for i in a for j in b], lists[1:], init)
 
 
-
 class FS:
     open = open
     exists = os.path.exists
 
 
-
 def isascii(s):
     """ str.isascii only exists in python3.7+ """
     try:
@@ -318,4 +280,46 @@ def isascii(s):
             s.encode('ascii')
             return True
         except (UnicodeDecodeError, UnicodeEncodeError):
-            return False
\ No newline at end of file
+            return False
+
+
+class fzset(frozenset):
+    def __repr__(self):
+        return '{%s}' % ', '.join(map(repr, self))
+
+
+def classify_bool(seq, pred):
+    true_elems = []
+    false_elems = []
+
+    for elem in seq:
+        if pred(elem):
+            true_elems.append(elem)
+        else:
+            false_elems.append(elem)
+
+    return true_elems, false_elems
+
+
+def bfs(initial, expand):
+    open_q = deque(list(initial))
+    visited = set(open_q)
+    while open_q:
+        node = open_q.popleft()
+        yield node
+        for next_node in expand(node):
+            if next_node not in visited:
+                visited.add(next_node)
+                open_q.append(next_node)
+
+
+def _serialize(value, memo):
+    if isinstance(value, Serialize):
+        return value.serialize(memo)
+    elif isinstance(value, list):
+        return [_serialize(elem, memo) for elem in value]
+    elif isinstance(value, frozenset):
+        return list(value)  # TODO reversible?
+    elif isinstance(value, dict):
+        return {key:_serialize(elem, memo) for key, elem in value.items()}
+    return value
\ No newline at end of file

From 39fb4c0f3e2c1c24ceeb4de29d6904a957eaaaf1 Mon Sep 17 00:00:00 2001
From: Erez Sh <erezshin@gmail.com>
Date: Fri, 14 Aug 2020 16:34:51 +0300
Subject: [PATCH 25/25] Bugfix and warn on ambiguous intermediate nodes, based
 on PR #651

---
 lark/parsers/earley_forest.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/lark/parsers/earley_forest.py b/lark/parsers/earley_forest.py
index c8b4f25..4ed75d9 100644
--- a/lark/parsers/earley_forest.py
+++ b/lark/parsers/earley_forest.py
@@ -13,6 +13,7 @@ from collections import deque
 from operator import attrgetter
 from importlib import import_module
 
+from ..utils import logger
 from ..tree import Tree
 from ..exceptions import ParseError
 
@@ -328,10 +329,17 @@ class ForestToAmbiguousTreeVisitor(ForestToTreeVisitor):
         self.output_stack[-1].children.append(node)
 
     def visit_symbol_node_in(self, node):
-        if self.forest_sum_visitor and node.is_ambiguous and isinf(node.priority):
-            self.forest_sum_visitor.visit(node)
-        if not node.is_intermediate and node.is_ambiguous:
-            self.output_stack.append(Tree('_ambig', []))
+        if node.is_ambiguous:
+            if self.forest_sum_visitor and isinf(node.priority):
+                self.forest_sum_visitor.visit(node)
+            if node.is_intermediate:
+                # TODO Support ambiguous intermediate nodes!
+                logger.warning("Ambiguous intermediate node in the SPPF: %s. "
+                        "Lark does not currently process these ambiguities; resolving with the first derivation.", node)
+                return next(iter(node.children))
+            else:
+                self.output_stack.append(Tree('_ambig', []))
+
         return iter(node.children)
 
     def visit_symbol_node_out(self, node):