diff --git a/lark/lexer.py b/lark/lexer.py index 3237b02..7e2c72a 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -71,20 +71,25 @@ class TerminalDef(object): class Token(Str): __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column') - def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): - self = super(Token, cls).__new__(cls, value) + def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None): + try: + self = super(Token, cls).__new__(cls, value) + except UnicodeDecodeError: + value = value.decode('latin1') + self = super(Token, cls).__new__(cls, value) + self.type = type_ self.pos_in_stream = pos_in_stream self.value = value self.line = line self.column = column - self.end_line = None - self.end_column = None + self.end_line = end_line + self.end_column = end_column return self @classmethod def new_borrow_pos(cls, type_, value, borrow_t): - return cls(type_, value, borrow_t.pos_in_stream, line=borrow_t.line, column=borrow_t.column) + return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column) def __reduce__(self): return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, )) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index bbf6fa3..2d5d547 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -346,7 +346,7 @@ def _fix_escaping(s): n2 = next(i) if n2 == '\\': w += '\\\\' - elif n2 not in 'unftr': + elif n2 not in 'uxnftr': w += '\\' w += n2 w = w.replace('\\"', '"').replace("'", "\\'") diff --git a/tests/test_parser.py b/tests/test_parser.py index 54fb288..68514a1 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -448,6 +448,13 @@ def _make_parser_test(LEXER, PARSER): """) g.parse(u'\xa3\u0101\u00a3\u0203\n') + def test_hex_escape(self): + g = _Lark(r"""start: A B C + A: "\x01" + B: /\x02/ + C: "\xABCD" + """) + g.parse('\x01\x02\xABCD') @unittest.skipIf(PARSER == 'cyk', "Takes forever") def test_stack_for_ebnf(self): @@ -1363,4 +1370,3 @@ for _LEXER in ('dynamic', 'dynamic_complete'): if __name__ == '__main__': unittest.main() -