Small improvements for debug info

5 years ago · 28e0a86f38
--- a/lark-stubs/exceptions.pyi
+++ b/lark-stubs/exceptions.pyi
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 from typing import Dict, Iterable, Callable, Union
 from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple
 from .tree import Tree
 from .lexer import Token
@@ -21,6 +21,9 @@ class LexError(LarkError):
    pass
 T = TypeVar('T')
 class UnexpectedInput(LarkError):
    pos_in_stream: int
@@ -28,10 +31,12 @@ class UnexpectedInput(LarkError):
        ...
    def match_examples(
        self,
        parse_fn: Callable[[str], Tree],
        examples: Dict[str, Iterable[str]]
    ):
            self,
            parse_fn: Callable[[str], Tree],
            examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
            token_type_match_fallback: bool = False,
            print_debug_info: bool = True
    ) -> T:
        ...
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -37,34 +37,43 @@ class UnexpectedInput(LarkError):
            after = text[pos:end].split(b'\n', 1)[0]
            return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace")
    def match_examples(self, parse_fn, examples, token_type_match_fallback=False):
    def match_examples(self, parse_fn, examples, token_type_match_fallback=False, print_debug_info=True):
        """ Given a parser instance and a dictionary mapping some label with
            some malformed syntax examples, it'll return the label for the
            example that bests matches the current error.
        """
        assert self.state is not None, "Not supported for this exception"
        if isinstance(examples, dict):
            examples = examples.items()
        candidate = (None, False)
        for label, example in examples.items():
        for i, (label, example) in enumerate(examples):
            assert not isinstance(example, STRING_TYPE)
            for malformed in example:
            for j, malformed in enumerate(example):
                try:
                    parse_fn(malformed)
                except UnexpectedInput as ut:
                    if ut.state == self.state:
                        try:
                            if ut.token == self.token:  # Try exact match first
                                if print_debug_info:
                                    print("Exact Match at %d, with example %d" % (i, j), (ut.token, self.token, ut.state, self.state))
                                return label
                            if token_type_match_fallback:
                                # Fallback to token types match
                                if (ut.token.type == self.token.type) and not candidate[-1]:
                                    if print_debug_info:
                                        print("Token Type Fallback at %d, with example %d" % (i, j))
                                    candidate = label, True
                        except AttributeError:
                            pass
                        if not candidate[0]:
                            if print_debug_info:
                                print("Defaulted at %d, with example %d" % (i, j))
                            candidate = label, False
        return candidate[0]
--- a/lark/parsers/lalr_puppet.py
+++ b/lark/parsers/lalr_puppet.py
@@ -16,7 +16,7 @@ class ParserPuppet:
        self.result = None
    def feed_token(self, token):
        """Advance the parser state, as if it just recieved `token` from the lexer
        """Advance the parser state, as if it just received `token` from the lexer
        """
        end_state = self.parser.parse_table.end_states[self._start]
@@ -66,9 +66,9 @@ class ParserPuppet:
            self._set_state,
        )
    def pretty():
    def pretty(self):
        print("Puppet choices:")
        for k, v in self.choices.items():
        for k, v in self.choices().items():
            print('\t-', k, '->', v)
        print('stack size:', len(self._state_stack))