| @@ -23,6 +23,7 @@ class LarkOptions: | |||||
| transformer: Optional[Transformer] | transformer: Optional[Transformer] | ||||
| postlex: Optional[PostLex] | postlex: Optional[PostLex] | ||||
| ambiguity: str | ambiguity: str | ||||
| regex: bool | |||||
| debug: bool | debug: bool | ||||
| keep_all_tokens: bool | keep_all_tokens: bool | ||||
| propagate_positions: bool | propagate_positions: bool | ||||
| @@ -48,6 +49,7 @@ class Lark: | |||||
| transformer: Optional[Transformer] = None, | transformer: Optional[Transformer] = None, | ||||
| postlex: Optional[PostLex] = None, | postlex: Optional[PostLex] = None, | ||||
| ambiguity: Literal["explicit", "resolve"] = "resolve", | ambiguity: Literal["explicit", "resolve"] = "resolve", | ||||
| regex: bool = False, | |||||
| debug: bool = False, | debug: bool = False, | ||||
| keep_all_tokens: bool = False, | keep_all_tokens: bool = False, | ||||
| propagate_positions: bool = False, | propagate_positions: bool = False, | ||||
| @@ -14,6 +14,12 @@ from .parse_tree_builder import ParseTreeBuilder | |||||
| from .parser_frontends import get_frontend | from .parser_frontends import get_frontend | ||||
| from .grammar import Rule | from .grammar import Rule | ||||
| import re | |||||
| try: | |||||
| import regex | |||||
| except ImportError: | |||||
| regex = None | |||||
| ###{standalone | ###{standalone | ||||
| class LarkOptions(Serialize): | class LarkOptions(Serialize): | ||||
| @@ -34,6 +40,7 @@ class LarkOptions(Serialize): | |||||
| When `False`, `[]` behaves like the `?` operator, | When `False`, `[]` behaves like the `?` operator, | ||||
| and returns no value at all. | and returns no value at all. | ||||
| (default=`False`. Recommended to set to `True`) | (default=`False`. Recommended to set to `True`) | ||||
| regex - When True, uses the `regex` module instead of the stdlib `re`. | |||||
| cache - Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. | cache - Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. | ||||
| LALR only for now. | LALR only for now. | ||||
| When `False`, does nothing (default) | When `False`, does nothing (default) | ||||
| @@ -92,6 +99,7 @@ class LarkOptions(Serialize): | |||||
| 'start': 'start', | 'start': 'start', | ||||
| 'priority': 'auto', | 'priority': 'auto', | ||||
| 'ambiguity': 'auto', | 'ambiguity': 'auto', | ||||
| 'regex': False, | |||||
| 'propagate_positions': False, | 'propagate_positions': False, | ||||
| 'lexer_callbacks': {}, | 'lexer_callbacks': {}, | ||||
| 'maybe_placeholders': False, | 'maybe_placeholders': False, | ||||
| @@ -154,6 +162,16 @@ class Lark(Serialize): | |||||
| self.options = LarkOptions(options) | self.options = LarkOptions(options) | ||||
| # Set regex or re module | |||||
| use_regex = self.options.regex | |||||
| if use_regex: | |||||
| if regex: | |||||
| self.re = regex | |||||
| else: | |||||
| raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.') | |||||
| else: | |||||
| self.re = re | |||||
| # Some, but not all file-like objects have a 'name' attribute | # Some, but not all file-like objects have a 'name' attribute | ||||
| try: | try: | ||||
| self.source = grammar.name | self.source = grammar.name | ||||
| @@ -224,7 +242,7 @@ class Lark(Serialize): | |||||
| assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) | assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) | ||||
| # Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
| self.grammar = load_grammar(grammar, self.source) | |||||
| self.grammar = load_grammar(grammar, self.source, self.re) | |||||
| # Compile the EBNF grammar into BNF | # Compile the EBNF grammar into BNF | ||||
| self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | ||||
| @@ -285,7 +303,7 @@ class Lark(Serialize): | |||||
| def _build_parser(self): | def _build_parser(self): | ||||
| self._prepare_callbacks() | self._prepare_callbacks() | ||||
| parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | ||||
| return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | |||||
| return self.parser_class(self.lexer_conf, parser_conf, self.re, options=self.options) | |||||
| def save(self, f): | def save(self, f): | ||||
| data, m = self.memo_serialize([TerminalDef, Rule]) | data, m = self.memo_serialize([TerminalDef, Rule]) | ||||
| @@ -312,10 +330,11 @@ class Lark(Serialize): | |||||
| if postlex is not None: | if postlex is not None: | ||||
| options['postlex'] = postlex | options['postlex'] = postlex | ||||
| self.options = LarkOptions.deserialize(options, memo) | self.options = LarkOptions.deserialize(options, memo) | ||||
| self.re = regex if self.options.regex else re | |||||
| self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | ||||
| self.source = '<deserialized>' | self.source = '<deserialized>' | ||||
| self._prepare_callbacks() | self._prepare_callbacks() | ||||
| self.parser = self.parser_class.deserialize(data['parser'], memo, self._callbacks, self.options.postlex) | |||||
| self.parser = self.parser_class.deserialize(data['parser'], memo, self._callbacks, self.options.postlex, self.re) | |||||
| return self | return self | ||||
| @classmethod | @classmethod | ||||
| @@ -1,9 +1,10 @@ | |||||
| ## Lexer Implementation | ## Lexer Implementation | ||||
| import re | |||||
| try: | try: | ||||
| import regex as re | |||||
| import regex | |||||
| except ImportError: | except ImportError: | ||||
| import re | |||||
| regex = None | |||||
| from .utils import Str, classify, get_regexp_width, Py36, Serialize | from .utils import Str, classify, get_regexp_width, Py36, Serialize | ||||
| from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | ||||
| @@ -233,7 +234,7 @@ class CallChain: | |||||
| def _create_unless(terminals, g_regex_flags): | |||||
| def _create_unless(terminals, g_regex_flags, re_): | |||||
| tokens_by_type = classify(terminals, lambda t: type(t.pattern)) | tokens_by_type = classify(terminals, lambda t: type(t.pattern)) | ||||
| assert len(tokens_by_type) <= 2, tokens_by_type.keys() | assert len(tokens_by_type) <= 2, tokens_by_type.keys() | ||||
| embedded_strs = set() | embedded_strs = set() | ||||
| @@ -244,7 +245,7 @@ def _create_unless(terminals, g_regex_flags): | |||||
| if strtok.priority > retok.priority: | if strtok.priority > retok.priority: | ||||
| continue | continue | ||||
| s = strtok.pattern.value | s = strtok.pattern.value | ||||
| m = re.match(retok.pattern.to_regexp(), s, g_regex_flags) | |||||
| m = re_.match(retok.pattern.to_regexp(), s, g_regex_flags) | |||||
| if m and m.group(0) == s: | if m and m.group(0) == s: | ||||
| unless.append(strtok) | unless.append(strtok) | ||||
| if strtok.pattern.flags <= retok.pattern.flags: | if strtok.pattern.flags <= retok.pattern.flags: | ||||
| @@ -297,16 +298,17 @@ class Lexer(object): | |||||
| class TraditionalLexer(Lexer): | class TraditionalLexer(Lexer): | ||||
| def __init__(self, terminals, ignore=(), user_callbacks={}, g_regex_flags=0): | |||||
| def __init__(self, terminals, re_, ignore=(), user_callbacks={}, g_regex_flags=0): | |||||
| assert all(isinstance(t, TerminalDef) for t in terminals), terminals | assert all(isinstance(t, TerminalDef) for t in terminals), terminals | ||||
| terminals = list(terminals) | terminals = list(terminals) | ||||
| self.re = re_ | |||||
| # Sanitization | # Sanitization | ||||
| for t in terminals: | for t in terminals: | ||||
| try: | try: | ||||
| re.compile(t.pattern.to_regexp(), g_regex_flags) | |||||
| except re.error: | |||||
| self.re.compile(t.pattern.to_regexp(), g_regex_flags) | |||||
| except self.re.error: | |||||
| raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) | raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) | ||||
| if t.pattern.min_width == 0: | if t.pattern.min_width == 0: | ||||
| @@ -324,7 +326,7 @@ class TraditionalLexer(Lexer): | |||||
| self.build(g_regex_flags) | self.build(g_regex_flags) | ||||
| def build(self, g_regex_flags=0): | def build(self, g_regex_flags=0): | ||||
| terminals, self.callback = _create_unless(self.terminals, g_regex_flags) | |||||
| terminals, self.callback = _create_unless(self.terminals, g_regex_flags, re_=self.re) | |||||
| assert all(self.callback.values()) | assert all(self.callback.values()) | ||||
| for type_, f in self.user_callbacks.items(): | for type_, f in self.user_callbacks.items(): | ||||
| @@ -350,7 +352,8 @@ class TraditionalLexer(Lexer): | |||||
| class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
| def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}, g_regex_flags=0): | |||||
| def __init__(self, terminals, states, re_, ignore=(), always_accept=(), user_callbacks={}, g_regex_flags=0): | |||||
| self.re = re_ | |||||
| tokens_by_name = {} | tokens_by_name = {} | ||||
| for t in terminals: | for t in terminals: | ||||
| assert t.name not in tokens_by_name, t | assert t.name not in tokens_by_name, t | ||||
| @@ -365,12 +368,12 @@ class ContextualLexer(Lexer): | |||||
| except KeyError: | except KeyError: | ||||
| accepts = set(accepts) | set(ignore) | set(always_accept) | accepts = set(accepts) | set(ignore) | set(always_accept) | ||||
| state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name] | state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name] | ||||
| lexer = TraditionalLexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks, g_regex_flags=g_regex_flags) | |||||
| lexer = TraditionalLexer(state_tokens, re_=self.re, ignore=ignore, user_callbacks=user_callbacks, g_regex_flags=g_regex_flags) | |||||
| lexer_by_tokens[key] = lexer | lexer_by_tokens[key] = lexer | ||||
| self.lexers[state] = lexer | self.lexers[state] = lexer | ||||
| self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks, g_regex_flags=g_regex_flags) | |||||
| self.root_lexer = TraditionalLexer(terminals, re_=self.re, ignore=ignore, user_callbacks=user_callbacks, g_regex_flags=g_regex_flags) | |||||
| def lex(self, stream, get_parser_state): | def lex(self, stream, get_parser_state): | ||||
| parser_state = get_parser_state() | parser_state = get_parser_state() | ||||
| @@ -616,7 +616,7 @@ class Grammar: | |||||
| _imported_grammars = {} | _imported_grammars = {} | ||||
| def import_grammar(grammar_path, base_paths=[]): | |||||
| def import_grammar(grammar_path, re_, base_paths=[]): | |||||
| if grammar_path not in _imported_grammars: | if grammar_path not in _imported_grammars: | ||||
| import_paths = base_paths + IMPORT_PATHS | import_paths = base_paths + IMPORT_PATHS | ||||
| for import_path in import_paths: | for import_path in import_paths: | ||||
| @@ -624,7 +624,7 @@ def import_grammar(grammar_path, base_paths=[]): | |||||
| joined_path = os.path.join(import_path, grammar_path) | joined_path = os.path.join(import_path, grammar_path) | ||||
| with open(joined_path, encoding='utf8') as f: | with open(joined_path, encoding='utf8') as f: | ||||
| text = f.read() | text = f.read() | ||||
| grammar = load_grammar(text, joined_path) | |||||
| grammar = load_grammar(text, joined_path, re_) | |||||
| _imported_grammars[grammar_path] = grammar | _imported_grammars[grammar_path] = grammar | ||||
| break | break | ||||
| else: | else: | ||||
| @@ -755,7 +755,8 @@ def _find_used_symbols(tree): | |||||
| for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | ||||
| class GrammarLoader: | class GrammarLoader: | ||||
| def __init__(self): | |||||
| def __init__(self, re_): | |||||
| self.re = re_ | |||||
| terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | ||||
| rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | ||||
| @@ -764,7 +765,7 @@ class GrammarLoader: | |||||
| lexer_conf = LexerConf(terminals, ['WS', 'COMMENT']) | lexer_conf = LexerConf(terminals, ['WS', 'COMMENT']) | ||||
| parser_conf = ParserConf(rules, callback, ['start']) | parser_conf = ParserConf(rules, callback, ['start']) | ||||
| self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf) | |||||
| self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf, re_) | |||||
| self.canonize_tree = CanonizeTree() | self.canonize_tree = CanonizeTree() | ||||
| @@ -862,7 +863,7 @@ class GrammarLoader: | |||||
| # import grammars | # import grammars | ||||
| for dotted_path, (base_paths, aliases) in imports.items(): | for dotted_path, (base_paths, aliases) in imports.items(): | ||||
| grammar_path = os.path.join(*dotted_path) + EXT | grammar_path = os.path.join(*dotted_path) + EXT | ||||
| g = import_grammar(grammar_path, base_paths=base_paths) | |||||
| g = import_grammar(grammar_path, self.re, base_paths=base_paths) | |||||
| new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | ||||
| term_defs += new_td | term_defs += new_td | ||||
| @@ -942,4 +943,5 @@ class GrammarLoader: | |||||
| load_grammar = GrammarLoader().load_grammar | |||||
| def load_grammar(grammar, source, re_): | |||||
| return GrammarLoader(re_).load_grammar(grammar, source) | |||||
| @@ -1,7 +1,3 @@ | |||||
| try: | |||||
| import regex as re | |||||
| except ImportError: | |||||
| import re | |||||
| from functools import partial | from functools import partial | ||||
| from .utils import get_regexp_width, Serialize | from .utils import get_regexp_width, Serialize | ||||
| @@ -66,14 +62,16 @@ class WithLexer(_ParserFrontend): | |||||
| __serialize_fields__ = 'parser', 'lexer_conf', 'start' | __serialize_fields__ = 'parser', 'lexer_conf', 'start' | ||||
| __serialize_namespace__ = LexerConf, | __serialize_namespace__ = LexerConf, | ||||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||||
| def __init__(self, lexer_conf, parser_conf, re_, options=None): | |||||
| self.lexer_conf = lexer_conf | self.lexer_conf = lexer_conf | ||||
| self.start = parser_conf.start | self.start = parser_conf.start | ||||
| self.postlex = lexer_conf.postlex | self.postlex = lexer_conf.postlex | ||||
| self.re = re_ | |||||
| @classmethod | @classmethod | ||||
| def deserialize(cls, data, memo, callbacks, postlex): | |||||
| def deserialize(cls, data, memo, callbacks, postlex, re_): | |||||
| inst = super(WithLexer, cls).deserialize(data, memo) | inst = super(WithLexer, cls).deserialize(data, memo) | ||||
| inst.re = re_ | |||||
| inst.postlex = postlex | inst.postlex = postlex | ||||
| inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks) | inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks) | ||||
| inst.init_lexer() | inst.init_lexer() | ||||
| @@ -91,13 +89,14 @@ class WithLexer(_ParserFrontend): | |||||
| return self._parse(token_stream, start) | return self._parse(token_stream, start) | ||||
| def init_traditional_lexer(self): | def init_traditional_lexer(self): | ||||
| self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks, g_regex_flags=self.lexer_conf.g_regex_flags) | |||||
| self.lexer = TraditionalLexer(self.lexer_conf.tokens, re_=self.re, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks, g_regex_flags=self.lexer_conf.g_regex_flags) | |||||
| class LALR_WithLexer(WithLexer): | class LALR_WithLexer(WithLexer): | ||||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||||
| def __init__(self, lexer_conf, parser_conf, re_, options=None): | |||||
| debug = options.debug if options else False | debug = options.debug if options else False | ||||
| self.re = re_ | |||||
| self.parser = LALR_Parser(parser_conf, debug=debug) | self.parser = LALR_Parser(parser_conf, debug=debug) | ||||
| WithLexer.__init__(self, lexer_conf, parser_conf, options) | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, re_, options) | |||||
| self.init_lexer() | self.init_lexer() | ||||
| @@ -113,6 +112,7 @@ class LALR_ContextualLexer(LALR_WithLexer): | |||||
| states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} | states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} | ||||
| always_accept = self.postlex.always_accept if self.postlex else () | always_accept = self.postlex.always_accept if self.postlex else () | ||||
| self.lexer = ContextualLexer(self.lexer_conf.tokens, states, | self.lexer = ContextualLexer(self.lexer_conf.tokens, states, | ||||
| re_=self.re, | |||||
| ignore=self.lexer_conf.ignore, | ignore=self.lexer_conf.ignore, | ||||
| always_accept=always_accept, | always_accept=always_accept, | ||||
| user_callbacks=self.lexer_conf.callbacks, | user_callbacks=self.lexer_conf.callbacks, | ||||
| @@ -129,11 +129,11 @@ class LALR_ContextualLexer(LALR_WithLexer): | |||||
| ###} | ###} | ||||
| class LALR_CustomLexer(LALR_WithLexer): | class LALR_CustomLexer(LALR_WithLexer): | ||||
| def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None): | |||||
| self.lexer = lexer_cls(lexer_conf) | |||||
| def __init__(self, lexer_cls, lexer_conf, parser_conf, re_, options=None): | |||||
| self.lexer = lexer_cls(lexer_conf, re_=re_) | |||||
| debug = options.debug if options else False | debug = options.debug if options else False | ||||
| self.parser = LALR_Parser(parser_conf, debug=debug) | self.parser = LALR_Parser(parser_conf, debug=debug) | ||||
| WithLexer.__init__(self, lexer_conf, parser_conf, options) | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, re_, options) | |||||
| def tokenize_text(text): | def tokenize_text(text): | ||||
| @@ -146,8 +146,8 @@ def tokenize_text(text): | |||||
| yield Token('CHAR', ch, line=line, column=i - col_start_pos) | yield Token('CHAR', ch, line=line, column=i - col_start_pos) | ||||
| class Earley(WithLexer): | class Earley(WithLexer): | ||||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, options) | |||||
| def __init__(self, lexer_conf, parser_conf, re_, options=None): | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, re_, options) | |||||
| self.init_traditional_lexer() | self.init_traditional_lexer() | ||||
| resolve_ambiguity = options.ambiguity == 'resolve' | resolve_ambiguity = options.ambiguity == 'resolve' | ||||
| @@ -159,7 +159,9 @@ class Earley(WithLexer): | |||||
| class XEarley(_ParserFrontend): | class XEarley(_ParserFrontend): | ||||
| def __init__(self, lexer_conf, parser_conf, options=None, **kw): | |||||
| def __init__(self, lexer_conf, parser_conf, re_, options=None, **kw): | |||||
| self.re = re_ | |||||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | self.token_by_name = {t.name:t for t in lexer_conf.tokens} | ||||
| self.start = parser_conf.start | self.start = parser_conf.start | ||||
| @@ -191,7 +193,7 @@ class XEarley(_ParserFrontend): | |||||
| if width == 0: | if width == 0: | ||||
| raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t) | raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t) | ||||
| self.regexps[t.name] = re.compile(regexp, lexer_conf.g_regex_flags) | |||||
| self.regexps[t.name] = self.re.compile(regexp, lexer_conf.g_regex_flags) | |||||
| def parse(self, text, start): | def parse(self, text, start): | ||||
| return self._parse(text, start) | return self._parse(text, start) | ||||
| @@ -204,8 +206,8 @@ class XEarley_CompleteLex(XEarley): | |||||
| class CYK(WithLexer): | class CYK(WithLexer): | ||||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, options) | |||||
| def __init__(self, lexer_conf, parser_conf, re_, options=None): | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, re_, options) | |||||
| self.init_traditional_lexer() | self.init_traditional_lexer() | ||||
| self._analysis = GrammarAnalyzer(parser_conf) | self._analysis = GrammarAnalyzer(parser_conf) | ||||
| @@ -551,8 +551,8 @@ class CustomLexer(Lexer): | |||||
| Purpose of this custom lexer is to test the integration, | Purpose of this custom lexer is to test the integration, | ||||
| so it uses the traditionalparser as implementation without custom lexing behaviour. | so it uses the traditionalparser as implementation without custom lexing behaviour. | ||||
| """ | """ | ||||
| def __init__(self, lexer_conf): | |||||
| self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks, g_regex_flags=lexer_conf.g_regex_flags) | |||||
| def __init__(self, lexer_conf, re_): | |||||
| self.lexer = TraditionalLexer(lexer_conf.tokens, re_, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks, g_regex_flags=lexer_conf.g_regex_flags) | |||||
| def lex(self, *args, **kwargs): | def lex(self, *args, **kwargs): | ||||
| return self.lexer.lex(*args, **kwargs) | return self.lexer.lex(*args, **kwargs) | ||||
| @@ -17,7 +17,7 @@ class TestRegex(unittest.TestCase): | |||||
| NAME: ID_START ID_CONTINUE* | NAME: ID_START ID_CONTINUE* | ||||
| ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ | ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ | ||||
| ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/ | ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/ | ||||
| """) | |||||
| """, regex=True) | |||||
| self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') | self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') | ||||
| @@ -26,7 +26,7 @@ class TestRegex(unittest.TestCase): | |||||
| g = Lark(r""" | g = Lark(r""" | ||||
| ?start: NAME | ?start: NAME | ||||
| NAME: /[\w]+/ | NAME: /[\w]+/ | ||||
| """) | |||||
| """, regex=True) | |||||
| self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') | self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') | ||||