diff --git a/examples/README.md b/examples/README.md index f40157d..8053ebd 100644 --- a/examples/README.md +++ b/examples/README.md @@ -27,6 +27,7 @@ For example, the following will parse all the Python files in the standard libra - [error\_reporting\_lalr.py](error_reporting_lalr.py) - A demonstration of example-driven error reporting with the LALR parser - [python\_parser.py](python_parser.py) - A fully-working Python 2 & 3 parser (but not production ready yet!) +- [python\_bytecode.py](python_bytecode.py) - A toy example showing how to compile Python directly to bytecode - [conf\_lalr.py](conf_lalr.py) - Demonstrates the power of LALR's contextual lexer on a toy configuration language - [conf\_earley.py](conf_earley.py) - Demonstrates the power of Earley's dynamic lexer on a toy configuration language - [custom\_lexer.py](custom_lexer.py) - Demonstrates using a custom lexer to parse a non-textual stream of data diff --git a/examples/python3.lark b/examples/python3.lark index 3f39f9f..78c9875 100644 --- a/examples/python3.lark +++ b/examples/python3.lark @@ -81,7 +81,7 @@ with_item: test ["as" expr] except_clause: "except" [test ["as" NAME]] suite: simple_stmt | _NEWLINE _INDENT stmt+ _DEDENT -?test: or_test ["if" or_test "else" test] | lambdef +?test: or_test ("if" or_test "else" test)? | lambdef ?test_nocond: or_test | lambdef_nocond lambdef: "lambda" [varargslist] ":" test lambdef_nocond: "lambda" [varargslist] ":" test_nocond @@ -107,7 +107,7 @@ star_expr: "*" expr // sake of a __future__ import described in PEP 401 (which really works :-) !_comp_op: "<"|">"|"=="|">="|"<="|"<>"|"!="|"in"|"not" "in"|"is"|"is" "not" -?power: await_expr ["**" factor] +?power: await_expr ("**" factor)? ?await_expr: AWAIT? atom_expr AWAIT: "await" @@ -137,7 +137,7 @@ dictorsetmaker: ( ((test ":" test | "**" expr) (comp_for | ("," (test ":" test | classdef: "class" NAME ["(" [arguments] ")"] ":" suite -arguments: argvalue ("," argvalue)* ["," [ starargs | kwargs]] +arguments: argvalue ("," argvalue)* ("," [ starargs | kwargs])? | starargs | kwargs | test comp_for @@ -145,7 +145,7 @@ arguments: argvalue ("," argvalue)* ["," [ starargs | kwargs]] starargs: "*" test ("," "*" test)* ("," argvalue)* ["," kwargs] kwargs: "**" test -?argvalue: test ["=" test] +?argvalue: test ("=" test)? diff --git a/examples/python_bytecode.py b/examples/python_bytecode.py new file mode 100644 index 0000000..cbb8ccd --- /dev/null +++ b/examples/python_bytecode.py @@ -0,0 +1,77 @@ +# +# This is a toy example that compiles Python directly to bytecode, without generating an AST. +# It currently only works for very very simple Python code. +# +# It requires the 'bytecode' library. You can get it using +# +# $ pip install bytecode +# + +from lark import Lark, Transformer, v_args +from lark.indenter import Indenter + +from bytecode import Instr, Bytecode + +class PythonIndenter(Indenter): + NL_type = '_NEWLINE' + OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE'] + CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE'] + INDENT_type = '_INDENT' + DEDENT_type = '_DEDENT' + tab_len = 8 + + +@v_args(inline=True) +class Compile(Transformer): + def number(self, n): + return [Instr('LOAD_CONST', int(n))] + def string(self, s): + return [Instr('LOAD_CONST', s[1:-1])] + def var(self, n): + return [Instr('LOAD_NAME', n)] + + def arith_expr(self, a, op, b): + # TODO support chain arithmetic + assert op == '+' + return a + b + [Instr('BINARY_ADD')] + + def arguments(self, args): + return args + + def funccall(self, name, args): + return name + args + [Instr('CALL_FUNCTION', 1)] + + @v_args(inline=False) + def file_input(self, stmts): + return sum(stmts, []) + [Instr("RETURN_VALUE")] + + def expr_stmt(self, lval, rval): + # TODO more complicated than that + name ,= lval + assert name.name == 'LOAD_NAME' # XXX avoid with another layer of abstraction + return rval + [Instr("STORE_NAME", name.arg)] + + def __default__(self, *args): + assert False, args + + +python_parser3 = Lark.open('python3.lark', rel_to=__file__, start='file_input', + parser='lalr', postlex=PythonIndenter(), + transformer=Compile(), propagate_positions=False) + +def compile_python(s): + insts = python_parser3.parse(s+"\n") + return Bytecode(insts).to_code() + +code = compile_python(""" +a = 3 +b = 5 +print("Hello World!") +print(a+(b+2)) +print((a+b)+2) +""") +exec(code) +# -- Output -- +# Hello World! +# 10 +# 10 diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 83ec341..77095a8 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -501,7 +501,7 @@ class Grammar: empty_indices = [x==_EMPTY for x in expansion] if any(empty_indices): - exp_options = copy(options) + exp_options = copy(options) or RuleOptions() exp_options.empty_indices = empty_indices expansion = [x for x in expansion if x!=_EMPTY] else: