| @@ -163,22 +163,14 @@ yield_arg: "from" test | testlist | |||||
| number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER | number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER | ||||
| string: STRING | LONG_STRING | string: STRING | LONG_STRING | ||||
| // Tokens | |||||
| NAME: /[a-zA-Z_]\w*/ | |||||
| COMMENT: /#[^\n]*/ | |||||
| _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ | |||||
| // Import terminals from standard library (grammars/python.lark) | |||||
| %import python (NAME, COMMENT, STRING, LONG_STRING) | |||||
| %import python (DEC_NUMBER, HEX_NUMBER, OCT_NUMBER, BIN_NUMBER, FLOAT_NUMBER, IMAG_NUMBER) | |||||
| STRING : /[ubf]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i | |||||
| LONG_STRING: /[ubf]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is | |||||
| // Other terminals | |||||
| DEC_NUMBER: /0|[1-9]\d*/i | |||||
| HEX_NUMBER.2: /0x[\da-f]*/i | |||||
| OCT_NUMBER.2: /0o[0-7]*/i | |||||
| BIN_NUMBER.2 : /0b[0-1]*/i | |||||
| FLOAT_NUMBER.2: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i | |||||
| IMAG_NUMBER.2: /\d+j/i | FLOAT_NUMBER "j"i | |||||
| _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ | |||||
| %ignore /[\t \f]+/ // WS | %ignore /[\t \f]+/ // WS | ||||
| %ignore /\\[\t \f]*\r?\n/ // LINE_CONT | %ignore /\\[\t \f]*\r?\n/ // LINE_CONT | ||||
| @@ -1,3 +1,6 @@ | |||||
| // Basic terminals for common use | |||||
| // | // | ||||
| // Numbers | // Numbers | ||||
| // | // | ||||
| @@ -21,7 +24,7 @@ SIGNED_NUMBER: ["+"|"-"] NUMBER | |||||
| // Strings | // Strings | ||||
| // | // | ||||
| _STRING_INNER: /.*?/ | _STRING_INNER: /.*?/ | ||||
| _STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/ | |||||
| _STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/ | |||||
| ESCAPED_STRING : "\"" _STRING_ESC_INNER "\"" | ESCAPED_STRING : "\"" _STRING_ESC_INNER "\"" | ||||
| @@ -48,3 +51,9 @@ CR : /\r/ | |||||
| LF : /\n/ | LF : /\n/ | ||||
| NEWLINE: (CR? LF)+ | NEWLINE: (CR? LF)+ | ||||
| // Comments | |||||
| SH_COMMENT: /#[^\n]*/ | |||||
| CPP_COMMENT: /\/\/[^\n]*/ | |||||
| C_COMMENT: "/*" /.*?/s "*/" | |||||
| SQL_COMMENT: /--[^\n]*/ | |||||
| @@ -0,0 +1,19 @@ | |||||
| // Python terminals | |||||
| NAME: /[a-zA-Z_]\w*/ | |||||
| COMMENT: /#[^\n]*/ | |||||
| STRING : /[ubf]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i | |||||
| LONG_STRING: /[ubf]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is | |||||
| DEC_NUMBER: /0|[1-9]\d*/i | |||||
| HEX_NUMBER.2: /0x[\da-f]*/i | |||||
| OCT_NUMBER.2: /0o[0-7]*/i | |||||
| BIN_NUMBER.2 : /0b[0-1]*/i | |||||
| FLOAT_NUMBER.2: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i | |||||
| IMAG_NUMBER.2: /\d+j/i | FLOAT_NUMBER "j"i | |||||
| // Comma-separated list (with an optional trailing comma) | |||||
| cs_list{item}: item ("," item)* ","? | |||||
| _cs_list{item}: item ("," item)* ","? | |||||
| @@ -294,8 +294,8 @@ class Lark(Serialize): | |||||
| if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: | if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: | ||||
| raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | ||||
| # Parse the grammar file and compose the grammars (TODO) | |||||
| self.grammar = load_grammar(grammar, self.source, self.options.import_paths, self.options.keep_all_tokens) | |||||
| # Parse the grammar file and compose the grammars | |||||
| self.grammar = load_grammar(grammar, self.source_path, self.options.import_paths, self.options.keep_all_tokens) | |||||
| if self.options.postlex is not None: | if self.options.postlex is not None: | ||||
| terminals_to_keep = set(self.options.postlex.always_accept) | terminals_to_keep = set(self.options.postlex.always_accept) | ||||