| @@ -9,7 +9,7 @@ class LexError(Exception): | |||||
| pass | pass | ||||
| class UnexpectedInput(LexError): | class UnexpectedInput(LexError): | ||||
| def __init__(self, seq, lex_pos, line, column): | |||||
| def __init__(self, seq, lex_pos, line, column, allowed=None): | |||||
| context = seq[lex_pos:lex_pos+5] | context = seq[lex_pos:lex_pos+5] | ||||
| message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) | message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) | ||||
| @@ -18,6 +18,7 @@ class UnexpectedInput(LexError): | |||||
| self.line = line | self.line = line | ||||
| self.column = column | self.column = column | ||||
| self.context = context | self.context = context | ||||
| self.allowed = allowed | |||||
| class Token(Str): | class Token(Str): | ||||
| def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): | def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): | ||||
| @@ -238,7 +239,6 @@ class ContextualLexer: | |||||
| break | break | ||||
| else: | else: | ||||
| if lex_pos < len(stream): | if lex_pos < len(stream): | ||||
| print("Allowed tokens:", lexer.tokens) | |||||
| raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos) | |||||
| raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos, lexer.tokens) | |||||
| break | break | ||||
| @@ -290,6 +290,31 @@ class ExtractAnonTokens(InlineTransformer): | |||||
| def _rfind(s, choices): | def _rfind(s, choices): | ||||
| return max(s.rfind(c) for c in choices) | return max(s.rfind(c) for c in choices) | ||||
| def _fix_escaping(s): | |||||
| s = s.replace('\\"', '"') | |||||
| w = '' | |||||
| i = iter(s) | |||||
| for n in i: | |||||
| w += n | |||||
| if n == '\\': | |||||
| n2 = next(i) | |||||
| if n2 == '\\': | |||||
| w += '\\\\' | |||||
| elif n2 not in 'unftr': | |||||
| w += '\\' | |||||
| w += n2 | |||||
| to_eval = "u'''%s'''" % w | |||||
| try: | |||||
| s = literal_eval(to_eval) | |||||
| except SyntaxError as e: | |||||
| raise ValueError(v, e) | |||||
| return s | |||||
| def _literal_to_pattern(literal): | def _literal_to_pattern(literal): | ||||
| v = literal.value | v = literal.value | ||||
| flag_start = _rfind(v, '/"')+1 | flag_start = _rfind(v, '/"')+1 | ||||
| @@ -300,13 +325,12 @@ def _literal_to_pattern(literal): | |||||
| v = v[:flag_start] | v = v[:flag_start] | ||||
| assert v[0] == v[-1] and v[0] in '"/' | assert v[0] == v[-1] and v[0] in '"/' | ||||
| x = v[1:-1] | x = v[1:-1] | ||||
| x = re.sub(r'(\\[wd/ .]|\\\[|\\\])', r'\\\1', x) | |||||
| x = x.replace("'", r"\'") | |||||
| to_eval = "u'''%s'''" % x | |||||
| try: | |||||
| s = literal_eval(to_eval) | |||||
| except SyntaxError as e: | |||||
| raise ValueError(v, e) | |||||
| s = _fix_escaping(x) | |||||
| if v[0] == '"': | |||||
| s = s.replace('\\\\', '\\') | |||||
| return { 'STRING': PatternStr, | return { 'STRING': PatternStr, | ||||
| 'REGEXP': PatternRE }[literal.type](s, flags or None) | 'REGEXP': PatternRE }[literal.type](s, flags or None) | ||||
| @@ -19,7 +19,7 @@ logging.basicConfig(level=logging.INFO) | |||||
| from lark.lark import Lark | from lark.lark import Lark | ||||
| from lark.common import GrammarError, ParseError | from lark.common import GrammarError, ParseError | ||||
| from lark.lexer import LexError | |||||
| from lark.lexer import LexError, UnexpectedInput | |||||
| from lark.tree import Tree, Transformer | from lark.tree import Tree, Transformer | ||||
| __path__ = os.path.dirname(__file__) | __path__ = os.path.dirname(__file__) | ||||
| @@ -673,7 +673,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| """) | """) | ||||
| x = g.parse(r'\a') | x = g.parse(r'\a') | ||||
| g = _Lark(r"""start: /\\\\/ /a/ | |||||
| g = _Lark(r"""start: /\\/ /a/ | |||||
| """) | """) | ||||
| x = g.parse(r'\a') | x = g.parse(r'\a') | ||||
| @@ -961,6 +961,49 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertEqual(tree.children, ['1']) | self.assertEqual(tree.children, ['1']) | ||||
| @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") | |||||
| def test_regex_escaping(self): | |||||
| expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput | |||||
| # TODO Make dynamic parser raise UnexpectedInput if nothing scans? | |||||
| g = _Lark("start: /[ab]/") | |||||
| g.parse('a') | |||||
| g.parse('b') | |||||
| self.assertRaises( expected_error, g.parse, 'c') | |||||
| _Lark(r'start: /\w/').parse('a') | |||||
| g = _Lark(r'start: /\\w/') | |||||
| self.assertRaises( expected_error, g.parse, 'a') | |||||
| g.parse(r'\w') | |||||
| _Lark(r'start: /\[/').parse('[') | |||||
| _Lark(r'start: /\//').parse('/') | |||||
| _Lark(r'start: /\\/').parse('\\') | |||||
| _Lark(r'start: /\[ab]/').parse('[ab]') | |||||
| _Lark(r'start: /\\[ab]/').parse('\\a') | |||||
| _Lark(r'start: /\t/').parse('\t') | |||||
| _Lark(r'start: /\\t/').parse('\\t') | |||||
| _Lark(r'start: /\\\t/').parse('\\\t') | |||||
| _Lark(r'start: "\t"').parse('\t') | |||||
| _Lark(r'start: "\\t"').parse('\\t') | |||||
| _Lark(r'start: "\\\t"').parse('\\\t') | |||||
| _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | ||||
| _TestParser.__name__ = _NAME | _TestParser.__name__ = _NAME | ||||