| @@ -4,6 +4,7 @@ | |||||
| /lark_parser.egg-info/** | /lark_parser.egg-info/** | ||||
| tags | tags | ||||
| .vscode | .vscode | ||||
| .idea | |||||
| .ropeproject | .ropeproject | ||||
| .cache | .cache | ||||
| /dist | /dist | ||||
| @@ -72,7 +72,7 @@ Lark is great at handling ambiguity. Let's parse the phrase "fruit flies like ba | |||||
|  |  | ||||
| See more [examples in the wiki](https://github.com/erezsh/lark/wiki/Examples) | |||||
| See more [examples here](https://github.com/lark-parser/lark/tree/master/examples) | |||||
| @@ -95,7 +95,7 @@ See more [examples in the wiki](https://github.com/erezsh/lark/wiki/Examples) | |||||
| - Extensive test suite [](https://codecov.io/gh/erezsh/lark) | - Extensive test suite [](https://codecov.io/gh/erezsh/lark) | ||||
| - And much more! | - And much more! | ||||
| See the full list of [features in the wiki](https://github.com/erezsh/lark/wiki/Features) | |||||
| See the full list of [features here](https://lark-parser.readthedocs.io/en/latest/features/) | |||||
| ### Comparison to other libraries | ### Comparison to other libraries | ||||
| @@ -5,4 +5,4 @@ from .exceptions import ParseError, LexError, GrammarError, UnexpectedToken, Une | |||||
| from .lexer import Token | from .lexer import Token | ||||
| from .lark import Lark | from .lark import Lark | ||||
| __version__ = "0.7.2" | |||||
| __version__ = "0.7.4" | |||||
| @@ -8,7 +8,6 @@ from .exceptions import UnexpectedCharacters, LexError | |||||
| ###{standalone | ###{standalone | ||||
| class Pattern(Serialize): | class Pattern(Serialize): | ||||
| __serialize_fields__ = 'value', 'flags' | |||||
| def __init__(self, value, flags=()): | def __init__(self, value, flags=()): | ||||
| self.value = value | self.value = value | ||||
| @@ -41,6 +40,8 @@ class Pattern(Serialize): | |||||
| class PatternStr(Pattern): | class PatternStr(Pattern): | ||||
| __serialize_fields__ = 'value', 'flags' | |||||
| type = "str" | type = "str" | ||||
| def to_regexp(self): | def to_regexp(self): | ||||
| @@ -52,6 +53,8 @@ class PatternStr(Pattern): | |||||
| max_width = min_width | max_width = min_width | ||||
| class PatternRE(Pattern): | class PatternRE(Pattern): | ||||
| __serialize_fields__ = 'value', 'flags', '_width' | |||||
| type = "re" | type = "re" | ||||
| def to_regexp(self): | def to_regexp(self): | ||||
| @@ -98,7 +101,7 @@ class Token(Str): | |||||
| self.type = type_ | self.type = type_ | ||||
| self.pos_in_stream = pos_in_stream | self.pos_in_stream = pos_in_stream | ||||
| self.value = Str(value) | |||||
| self.value = value | |||||
| self.line = line | self.line = line | ||||
| self.column = column | self.column = column | ||||
| self.end_line = end_line | self.end_line = end_line | ||||
| @@ -265,13 +268,14 @@ def build_mres(terminals, match_whole=False): | |||||
| return _build_mres(terminals, len(terminals), match_whole) | return _build_mres(terminals, len(terminals), match_whole) | ||||
| def _regexp_has_newline(r): | def _regexp_has_newline(r): | ||||
| """Expressions that may indicate newlines in a regexp: | |||||
| r"""Expressions that may indicate newlines in a regexp: | |||||
| - newlines (\n) | - newlines (\n) | ||||
| - escaped newline (\\n) | - escaped newline (\\n) | ||||
| - anything but ([^...]) | - anything but ([^...]) | ||||
| - any-char (.) when the flag (?s) exists | - any-char (.) when the flag (?s) exists | ||||
| - spaces (\s) | |||||
| """ | """ | ||||
| return '\n' in r or '\\n' in r or '[^' in r or ('(?s' in r and '.' in r) | |||||
| return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | |||||
| class Lexer(object): | class Lexer(object): | ||||
| """Lexer interface | """Lexer interface | ||||
| @@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder | |||||
| from .parser_frontends import LALR_TraditionalLexer | from .parser_frontends import LALR_TraditionalLexer | ||||
| from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
| from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | ||||
| from .utils import classify, suppress, dedup_list | |||||
| from .utils import classify, suppress, dedup_list, Str | |||||
| from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | ||||
| from .tree import Tree, SlottedTree as ST | from .tree import Tree, SlottedTree as ST | ||||
| @@ -351,7 +351,10 @@ def _fix_escaping(s): | |||||
| for n in i: | for n in i: | ||||
| w += n | w += n | ||||
| if n == '\\': | if n == '\\': | ||||
| n2 = next(i) | |||||
| try: | |||||
| n2 = next(i) | |||||
| except StopIteration: | |||||
| raise ValueError("Literal ended unexpectedly (bad escaping): `%r`" % s) | |||||
| if n2 == '\\': | if n2 == '\\': | ||||
| w += '\\\\' | w += '\\\\' | ||||
| elif n2 not in 'uxnftr': | elif n2 not in 'uxnftr': | ||||
| @@ -451,9 +454,9 @@ class PrepareSymbols(Transformer_InPlace): | |||||
| if isinstance(v, Tree): | if isinstance(v, Tree): | ||||
| return v | return v | ||||
| elif v.type == 'RULE': | elif v.type == 'RULE': | ||||
| return NonTerminal(v.value) | |||||
| return NonTerminal(Str(v.value)) | |||||
| elif v.type == 'TERMINAL': | elif v.type == 'TERMINAL': | ||||
| return Terminal(v.value, filter_out=v.startswith('_')) | |||||
| return Terminal(Str(v.value), filter_out=v.startswith('_')) | |||||
| assert False | assert False | ||||
| def _choice_of_rules(rules): | def _choice_of_rules(rules): | ||||
| @@ -511,12 +514,12 @@ class Grammar: | |||||
| simplify_rule = SimplifyRule_Visitor() | simplify_rule = SimplifyRule_Visitor() | ||||
| compiled_rules = [] | compiled_rules = [] | ||||
| for i, rule_content in enumerate(rules): | |||||
| for rule_content in rules: | |||||
| name, tree, options = rule_content | name, tree, options = rule_content | ||||
| simplify_rule.visit(tree) | simplify_rule.visit(tree) | ||||
| expansions = rule_tree_to_text.transform(tree) | expansions = rule_tree_to_text.transform(tree) | ||||
| for expansion, alias in expansions: | |||||
| for i, (expansion, alias) in enumerate(expansions): | |||||
| if alias and name.startswith('_'): | if alias and name.startswith('_'): | ||||
| raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | ||||
| @@ -538,7 +541,7 @@ class Grammar: | |||||
| for dups in duplicates.values(): | for dups in duplicates.values(): | ||||
| if len(dups) > 1: | if len(dups) > 1: | ||||
| if dups[0].expansion: | if dups[0].expansion: | ||||
| raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates)) | |||||
| raise GrammarError("Rules defined twice: %s\n\n(Might happen due to colliding expansion of optionals: [] or ?)" % ''.join('\n * %s' % i for i in dups)) | |||||
| # Empty rule; assert all other attributes are equal | # Empty rule; assert all other attributes are equal | ||||
| assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups) | assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups) | ||||
| @@ -605,7 +608,9 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): | |||||
| _, tree, _ = imported_rules[symbol] | _, tree, _ = imported_rules[symbol] | ||||
| except KeyError: | except KeyError: | ||||
| raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) | raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) | ||||
| return tree.scan_values(lambda x: x.type in ('RULE', 'TERMINAL')) | |||||
| return _find_used_symbols(tree) | |||||
| def get_namespace_name(name): | def get_namespace_name(name): | ||||
| try: | try: | ||||
| @@ -682,6 +687,11 @@ class PrepareGrammar(Transformer_InPlace): | |||||
| return name | return name | ||||
| def _find_used_symbols(tree): | |||||
| assert tree.data == 'expansions' | |||||
| return {t for x in tree.find_data('expansion') | |||||
| for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | |||||
| class GrammarLoader: | class GrammarLoader: | ||||
| def __init__(self): | def __init__(self): | ||||
| terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | ||||
| @@ -843,9 +853,7 @@ class GrammarLoader: | |||||
| rule_names.add(name) | rule_names.add(name) | ||||
| for name, expansions, _o in rules: | for name, expansions, _o in rules: | ||||
| used_symbols = {t for x in expansions.find_data('expansion') | |||||
| for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | |||||
| for sym in used_symbols: | |||||
| for sym in _find_used_symbols(expansions): | |||||
| if sym.type == 'TERMINAL': | if sym.type == 'TERMINAL': | ||||
| if sym not in terminal_names: | if sym not in terminal_names: | ||||
| raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name)) | raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name)) | ||||
| @@ -118,7 +118,7 @@ class LALR_ContextualLexer(LALR_WithLexer): | |||||
| class LALR_CustomLexer(LALR_WithLexer): | class LALR_CustomLexer(LALR_WithLexer): | ||||
| def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None): | ||||
| self.lexer = lexer_cls(self.lexer_conf) | |||||
| self.lexer = lexer_cls(lexer_conf) | |||||
| debug = options.debug if options else False | debug = options.debug if options else False | ||||
| self.parser = LALR_Parser(parser_conf, debug=debug) | self.parser = LALR_Parser(parser_conf, debug=debug) | ||||
| WithLexer.__init__(self, lexer_conf, parser_conf, options) | WithLexer.__init__(self, lexer_conf, parser_conf, options) | ||||
| @@ -139,7 +139,8 @@ class Earley(WithLexer): | |||||
| self.init_traditional_lexer() | self.init_traditional_lexer() | ||||
| resolve_ambiguity = options.ambiguity == 'resolve' | resolve_ambiguity = options.ambiguity == 'resolve' | ||||
| self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity) | |||||
| debug = options.debug if options else False | |||||
| self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug) | |||||
| def match(self, term, token): | def match(self, term, token): | ||||
| return term.name == token.type | return term.name == token.type | ||||
| @@ -152,10 +153,12 @@ class XEarley(_ParserFrontend): | |||||
| self._prepare_match(lexer_conf) | self._prepare_match(lexer_conf) | ||||
| resolve_ambiguity = options.ambiguity == 'resolve' | resolve_ambiguity = options.ambiguity == 'resolve' | ||||
| debug = options.debug if options else False | |||||
| self.parser = xearley.Parser(parser_conf, | self.parser = xearley.Parser(parser_conf, | ||||
| self.match, | self.match, | ||||
| ignore=lexer_conf.ignore, | ignore=lexer_conf.ignore, | ||||
| resolve_ambiguity=resolve_ambiguity, | resolve_ambiguity=resolve_ambiguity, | ||||
| debug=debug, | |||||
| **kw | **kw | ||||
| ) | ) | ||||
| @@ -20,10 +20,11 @@ from .earley_common import Item, TransitiveItem | |||||
| from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor | from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor | ||||
| class Parser: | class Parser: | ||||
| def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True): | |||||
| def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False): | |||||
| analysis = GrammarAnalyzer(parser_conf) | analysis = GrammarAnalyzer(parser_conf) | ||||
| self.parser_conf = parser_conf | self.parser_conf = parser_conf | ||||
| self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
| self.debug = debug | |||||
| self.FIRST = analysis.FIRST | self.FIRST = analysis.FIRST | ||||
| self.NULLABLE = analysis.NULLABLE | self.NULLABLE = analysis.NULLABLE | ||||
| @@ -296,6 +297,10 @@ class Parser: | |||||
| # symbol should have been completed in the last step of the Earley cycle, and will be in | # symbol should have been completed in the last step of the Earley cycle, and will be in | ||||
| # this column. Find the item for the start_symbol, which is the root of the SPPF tree. | # this column. Find the item for the start_symbol, which is the root of the SPPF tree. | ||||
| solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0] | ||||
| if self.debug: | |||||
| from .earley_forest import ForestToPyDotVisitor | |||||
| debug_walker = ForestToPyDotVisitor() | |||||
| debug_walker.visit(solutions[0], "sppf.png") | |||||
| if not solutions: | if not solutions: | ||||
| expected_tokens = [t.expect for t in to_scan] | expected_tokens = [t.expect for t in to_scan] | ||||
| @@ -122,7 +122,7 @@ class PackedNode(ForestNode): | |||||
| ambiguously. Hence, we use the sort order to identify | ambiguously. Hence, we use the sort order to identify | ||||
| the order in which ambiguous children should be considered. | the order in which ambiguous children should be considered. | ||||
| """ | """ | ||||
| return self.is_empty, -self.priority, -self.rule.order | |||||
| return self.is_empty, -self.priority, self.rule.order | |||||
| def __iter__(self): | def __iter__(self): | ||||
| return iter([self.left, self.right]) | return iter([self.left, self.right]) | ||||
| @@ -24,8 +24,8 @@ from .earley_forest import SymbolNode | |||||
| class Parser(BaseParser): | class Parser(BaseParser): | ||||
| def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False): | |||||
| BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity) | |||||
| def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False, debug=False): | |||||
| BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity, debug) | |||||
| self.ignore = [Terminal(t) for t in ignore] | self.ignore = [Terminal(t) for t in ignore] | ||||
| self.complete_lex = complete_lex | self.complete_lex = complete_lex | ||||
| @@ -0,0 +1,39 @@ | |||||
| import codecs | |||||
| import sys | |||||
| import json | |||||
| from lark import Lark | |||||
| from lark.grammar import RuleOptions, Rule | |||||
| from lark.lexer import TerminalDef | |||||
| import argparse | |||||
| argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize') #description='''Lark Serialization Tool -- Stores Lark's internal state & LALR analysis as a convenient JSON file''') | |||||
| argparser.add_argument('grammar_file', type=argparse.FileType('r'), help='A valid .lark file') | |||||
| argparser.add_argument('-o', '--out', type=argparse.FileType('w'), default=sys.stdout, help='json file path to create (default=stdout)') | |||||
| argparser.add_argument('-s', '--start', default='start', help='start symbol (default="start")', nargs='+') | |||||
| argparser.add_argument('-l', '--lexer', default='standard', choices=['standard', 'contextual'], help='lexer type (default="standard")') | |||||
| def serialize(infile, outfile, lexer, start): | |||||
| lark_inst = Lark(infile, parser="lalr", lexer=lexer, start=start) # TODO contextual | |||||
| data, memo = lark_inst.memo_serialize([TerminalDef, Rule]) | |||||
| outfile.write('{\n') | |||||
| outfile.write(' "data": %s,\n' % json.dumps(data)) | |||||
| outfile.write(' "memo": %s\n' % json.dumps(memo)) | |||||
| outfile.write('}\n') | |||||
| def main(): | |||||
| if len(sys.argv) == 1 or '-h' in sys.argv or '--help' in sys.argv: | |||||
| print("Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file") | |||||
| print("") | |||||
| argparser.print_help() | |||||
| else: | |||||
| args = argparser.parse_args() | |||||
| serialize(args.grammar_file, args.out, args.lexer, args.start) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @@ -56,30 +56,6 @@ class Tree(object): | |||||
| def __hash__(self): | def __hash__(self): | ||||
| return hash((self.data, tuple(self.children))) | return hash((self.data, tuple(self.children))) | ||||
| ###} | |||||
| def expand_kids_by_index(self, *indices): | |||||
| "Expand (inline) children at the given indices" | |||||
| for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||||
| kid = self.children[i] | |||||
| self.children[i:i+1] = kid.children | |||||
| def find_pred(self, pred): | |||||
| "Find all nodes where pred(tree) == True" | |||||
| return filter(pred, self.iter_subtrees()) | |||||
| def find_data(self, data): | |||||
| "Find all nodes where tree.data == data" | |||||
| return self.find_pred(lambda t: t.data == data) | |||||
| def scan_values(self, pred): | |||||
| for c in self.children: | |||||
| if isinstance(c, Tree): | |||||
| for t in c.scan_values(pred): | |||||
| yield t | |||||
| else: | |||||
| if pred(c): | |||||
| yield c | |||||
| def iter_subtrees(self): | def iter_subtrees(self): | ||||
| # TODO: Re-write as a more efficient version | # TODO: Re-write as a more efficient version | ||||
| @@ -102,6 +78,31 @@ class Tree(object): | |||||
| yield x | yield x | ||||
| seen.add(id(x)) | seen.add(id(x)) | ||||
| def find_pred(self, pred): | |||||
| "Find all nodes where pred(tree) == True" | |||||
| return filter(pred, self.iter_subtrees()) | |||||
| def find_data(self, data): | |||||
| "Find all nodes where tree.data == data" | |||||
| return self.find_pred(lambda t: t.data == data) | |||||
| ###} | |||||
| def expand_kids_by_index(self, *indices): | |||||
| "Expand (inline) children at the given indices" | |||||
| for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||||
| kid = self.children[i] | |||||
| self.children[i:i+1] = kid.children | |||||
| def scan_values(self, pred): | |||||
| for c in self.children: | |||||
| if isinstance(c, Tree): | |||||
| for t in c.scan_values(pred): | |||||
| yield t | |||||
| else: | |||||
| if pred(c): | |||||
| yield c | |||||
| def iter_subtrees_topdown(self): | def iter_subtrees_topdown(self): | ||||
| stack = [self] | stack = [self] | ||||
| while stack: | while stack: | ||||
| @@ -160,7 +160,7 @@ def smart_decorator(f, create_decorator): | |||||
| elif isinstance(f, partial): | elif isinstance(f, partial): | ||||
| # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445 | # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445 | ||||
| return create_decorator(f.__func__, True) | |||||
| return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True)) | |||||
| else: | else: | ||||
| return create_decorator(f.__func__.__call__, True) | return create_decorator(f.__func__.__call__, True) | ||||
| @@ -172,7 +172,7 @@ import sre_parse | |||||
| import sre_constants | import sre_constants | ||||
| def get_regexp_width(regexp): | def get_regexp_width(regexp): | ||||
| try: | try: | ||||
| return sre_parse.parse(regexp).getwidth() | |||||
| return [int(x) for x in sre_parse.parse(regexp).getwidth()] | |||||
| except sre_constants.error: | except sre_constants.error: | ||||
| raise ValueError(regexp) | raise ValueError(regexp) | ||||
| @@ -0,0 +1,10 @@ | |||||
| version: 2 | |||||
| mkdocs: | |||||
| configuration: mkdocs.yml | |||||
| fail_on_warning: false | |||||
| formats: all | |||||
| python: | |||||
| version: 3.5 | |||||
| @@ -21,6 +21,7 @@ from .test_parser import ( | |||||
| TestCykStandard, | TestCykStandard, | ||||
| TestLalrContextual, | TestLalrContextual, | ||||
| TestEarleyDynamic, | TestEarleyDynamic, | ||||
| TestLalrCustom, | |||||
| # TestFullEarleyStandard, | # TestFullEarleyStandard, | ||||
| TestFullEarleyDynamic, | TestFullEarleyDynamic, | ||||
| @@ -22,7 +22,7 @@ from lark.exceptions import GrammarError, ParseError, UnexpectedToken, Unexpecte | |||||
| from lark.tree import Tree | from lark.tree import Tree | ||||
| from lark.visitors import Transformer, Transformer_InPlace, v_args | from lark.visitors import Transformer, Transformer_InPlace, v_args | ||||
| from lark.grammar import Rule | from lark.grammar import Rule | ||||
| from lark.lexer import TerminalDef | |||||
| from lark.lexer import TerminalDef, Lexer, TraditionalLexer | |||||
| __path__ = os.path.dirname(__file__) | __path__ = os.path.dirname(__file__) | ||||
| def _read(n, *args): | def _read(n, *args): | ||||
| @@ -431,12 +431,22 @@ def _make_full_earley_test(LEXER): | |||||
| _TestFullEarley.__name__ = _NAME | _TestFullEarley.__name__ = _NAME | ||||
| globals()[_NAME] = _TestFullEarley | globals()[_NAME] = _TestFullEarley | ||||
| class CustomLexer(Lexer): | |||||
| """ | |||||
| Purpose of this custom lexer is to test the integration, | |||||
| so it uses the traditionalparser as implementation without custom lexing behaviour. | |||||
| """ | |||||
| def __init__(self, lexer_conf): | |||||
| self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks) | |||||
| def lex(self, *args, **kwargs): | |||||
| return self.lexer.lex(*args, **kwargs) | |||||
| def _make_parser_test(LEXER, PARSER): | def _make_parser_test(LEXER, PARSER): | ||||
| lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER | |||||
| def _Lark(grammar, **kwargs): | def _Lark(grammar, **kwargs): | ||||
| return Lark(grammar, lexer=LEXER, parser=PARSER, propagate_positions=True, **kwargs) | |||||
| return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs) | |||||
| def _Lark_open(gfilename, **kwargs): | def _Lark_open(gfilename, **kwargs): | ||||
| return Lark.open(gfilename, lexer=LEXER, parser=PARSER, propagate_positions=True, **kwargs) | |||||
| return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs) | |||||
| class _TestParser(unittest.TestCase): | class _TestParser(unittest.TestCase): | ||||
| def test_basic1(self): | def test_basic1(self): | ||||
| g = _Lark("""start: a+ b a* "b" a* | g = _Lark("""start: a+ b a* "b" a* | ||||
| @@ -1532,7 +1542,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| parser = _Lark(grammar) | parser = _Lark(grammar) | ||||
| @unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)") | |||||
| @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)") | |||||
| def test_serialize(self): | def test_serialize(self): | ||||
| grammar = """ | grammar = """ | ||||
| start: _ANY b "C" | start: _ANY b "C" | ||||
| @@ -1558,6 +1568,28 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertEqual(parser.parse('xa', 'a'), Tree('a', [])) | self.assertEqual(parser.parse('xa', 'a'), Tree('a', [])) | ||||
| self.assertEqual(parser.parse('xb', 'b'), Tree('b', [])) | self.assertEqual(parser.parse('xb', 'b'), Tree('b', [])) | ||||
| def test_lexer_detect_newline_tokens(self): | |||||
| # Detect newlines in regular tokens | |||||
| g = _Lark(r"""start: "go" tail* | |||||
| !tail : SA "@" | SB "@" | SC "@" | SD "@" | |||||
| SA : "a" /\n/ | |||||
| SB : /b./s | |||||
| SC : "c" /[^a-z]/ | |||||
| SD : "d" /\s/ | |||||
| """) | |||||
| a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children] | |||||
| self.assertEqual(a.line, 2) | |||||
| self.assertEqual(b.line, 3) | |||||
| self.assertEqual(c.line, 4) | |||||
| self.assertEqual(d.line, 5) | |||||
| # Detect newlines in ignored tokens | |||||
| for re in ['/\\n/', '/[^a-z]/', '/\\s/']: | |||||
| g = _Lark('''!start: "a" "a" | |||||
| %ignore {}'''.format(re)) | |||||
| a, b = g.parse('a\na').children | |||||
| self.assertEqual(a.line, 1) | |||||
| self.assertEqual(b.line, 2) | |||||
| _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() | ||||
| @@ -1572,6 +1604,7 @@ _TO_TEST = [ | |||||
| ('dynamic_complete', 'earley'), | ('dynamic_complete', 'earley'), | ||||
| ('standard', 'lalr'), | ('standard', 'lalr'), | ||||
| ('contextual', 'lalr'), | ('contextual', 'lalr'), | ||||
| ('custom', 'lalr'), | |||||
| # (None, 'earley'), | # (None, 'earley'), | ||||
| ] | ] | ||||
| @@ -4,6 +4,7 @@ import unittest | |||||
| from unittest import TestCase | from unittest import TestCase | ||||
| import copy | import copy | ||||
| import pickle | import pickle | ||||
| import functools | |||||
| from lark.tree import Tree | from lark.tree import Tree | ||||
| from lark.visitors import Transformer, Interpreter, visit_children_decor, v_args, Discard | from lark.visitors import Transformer, Interpreter, visit_children_decor, v_args, Discard | ||||
| @@ -146,6 +147,22 @@ class TestTrees(TestCase): | |||||
| res = T().transform(t) | res = T().transform(t) | ||||
| self.assertEqual(res, 2.9) | self.assertEqual(res, 2.9) | ||||
| def test_partial(self): | |||||
| tree = Tree("start", [Tree("a", ["test1"]), Tree("b", ["test2"])]) | |||||
| def test(prefix, s, postfix): | |||||
| return prefix + s.upper() + postfix | |||||
| @v_args(inline=True) | |||||
| class T(Transformer): | |||||
| a = functools.partial(test, "@", postfix="!") | |||||
| b = functools.partial(lambda s: s + "!") | |||||
| res = T().transform(tree) | |||||
| assert res.children == ["@TEST1!", "test2!"] | |||||
| def test_discard(self): | def test_discard(self): | ||||
| class MyTransformer(Transformer): | class MyTransformer(Transformer): | ||||
| def a(self, args): | def a(self, args): | ||||