| @@ -0,0 +1 @@ | |||||
| .python-version | |||||
| @@ -1 +1 @@ | |||||
| include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.g tests/*.py tests/test_nearley/*.py tests/test_nearley/grammars/* | |||||
| include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.lark tests/*.py tests/test_nearley/*.py tests/test_nearley/grammars/* | |||||
| @@ -79,7 +79,7 @@ By the way, if you're curious what these terminals signify, they are roughly equ | |||||
| Lark will accept this, if you really want to complicate your life :) | Lark will accept this, if you really want to complicate your life :) | ||||
| (You can find the original definitions in [common.g](/lark/grammars/common.g).) | |||||
| (You can find the original definitions in [common.lark](/lark/grammars/common.lark).) | |||||
| Notice that terminals are written in UPPER-CASE, while rules are written in lower-case. | Notice that terminals are written in UPPER-CASE, while rules are written in lower-case. | ||||
| I'll touch more on the differences between rules and terminals later. | I'll touch more on the differences between rules and terminals later. | ||||
| @@ -7,6 +7,7 @@ | |||||
| - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) | - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) | ||||
| - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity | - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity | ||||
| - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. | - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. | ||||
| - [lark\_grammar.py](lark_grammar.py) + [lark.lark](lark.lark) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) | |||||
| ### Advanced | ### Advanced | ||||
| @@ -0,0 +1,49 @@ | |||||
| start: (_item | _NL)* | |||||
| _item: rule | |||||
| | token | |||||
| | statement | |||||
| rule: RULE priority? ":" expansions _NL | |||||
| token: TOKEN priority? ":" expansions _NL | |||||
| priority: "." NUMBER | |||||
| statement: "%ignore" expansions _NL -> ignore | |||||
| | "%import" import_args ["->" TOKEN] _NL -> import | |||||
| import_args: name ("." name)* | |||||
| ?expansions: alias (_VBAR alias)* | |||||
| ?alias: expansion ["->" RULE] | |||||
| ?expansion: expr* | |||||
| ?expr: atom [OP | "~" NUMBER [".." NUMBER]] | |||||
| ?atom: "(" expansions ")" | |||||
| | "[" expansions "]" -> maybe | |||||
| | STRING ".." STRING -> literal_range | |||||
| | name | |||||
| | (REGEXP | STRING) -> literal | |||||
| name: RULE | |||||
| | TOKEN | |||||
| _VBAR: _NL? "|" | |||||
| OP: /[+*][?]?|[?](?![a-z])/ | |||||
| RULE: /!?[_?]?[a-z][_a-z0-9]*/ | |||||
| TOKEN: /_?[A-Z][_A-Z0-9]*/ | |||||
| STRING: _STRING "i"? | |||||
| REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/ | |||||
| _NL: /(\r?\n)+\s*/ | |||||
| %import common.ESCAPED_STRING -> _STRING | |||||
| %import common.INT -> NUMBER | |||||
| %import common.WS_INLINE | |||||
| COMMENT: "//" /[^\n]/* | |||||
| %ignore WS_INLINE | |||||
| %ignore COMMENT | |||||
| @@ -0,0 +1,18 @@ | |||||
| from lark import Lark | |||||
| parser = Lark(open('examples/lark.lark'), parser="lalr") | |||||
| grammar_files = [ | |||||
| 'examples/python2.lark', | |||||
| 'examples/python3.lark', | |||||
| 'examples/lark.lark', | |||||
| 'lark/grammars/common.lark', | |||||
| ] | |||||
| def test(): | |||||
| for grammar_file in grammar_files: | |||||
| tree = parser.parse(open(grammar_file).read()) | |||||
| print("All grammars parsed successfully") | |||||
| if __name__ == '__main__': | |||||
| test() | |||||
| @@ -22,10 +22,9 @@ class PythonIndenter(Indenter): | |||||
| kwargs = dict(rel_to=__file__, postlex=PythonIndenter(), start='file_input') | kwargs = dict(rel_to=__file__, postlex=PythonIndenter(), start='file_input') | ||||
| python_parser2 = Lark.open('python2.g', parser='lalr', **kwargs) | |||||
| python_parser3 = Lark.open('python3.g',parser='lalr', **kwargs) | |||||
| python_parser2_earley = Lark.open('python2.g', parser='earley', lexer='standard', **kwargs) | |||||
| print(python_parser3) | |||||
| python_parser2 = Lark.open('python2.lark', parser='lalr', **kwargs) | |||||
| python_parser3 = Lark.open('python3.lark',parser='lalr', **kwargs) | |||||
| python_parser2_earley = Lark.open('python2.lark', parser='earley', lexer='standard', **kwargs) | |||||
| def _read(fn, *args): | def _read(fn, *args): | ||||
| @@ -1 +1 @@ | |||||
| python -m lark.tools.standalone json.g > json_parser.py | |||||
| python -m lark.tools.standalone json.lark > json_parser.py | |||||
| @@ -20,6 +20,7 @@ SIGNED_NUMBER: ["+"|"-"] NUMBER | |||||
| // | // | ||||
| // Strings | // Strings | ||||
| // | // | ||||
| //STRING: /"(\\\"|\\\\|[^"\n])*?"i?/ | |||||
| STRING_INNER: ("\\\""|/[^"]/) | STRING_INNER: ("\\\""|/[^"]/) | ||||
| ESCAPED_STRING: "\"" STRING_INNER* "\"" | ESCAPED_STRING: "\"" STRING_INNER* "\"" | ||||
| @@ -375,6 +375,7 @@ class TokenTreeToPattern(Transformer): | |||||
| return p | return p | ||||
| def expansion(self, items): | def expansion(self, items): | ||||
| assert items | |||||
| if len(items) == 1: | if len(items) == 1: | ||||
| return items[0] | return items[0] | ||||
| if len({i.flags for i in items}) > 1: | if len({i.flags for i in items}) > 1: | ||||
| @@ -611,7 +612,7 @@ class GrammarLoader: | |||||
| elif stmt.data == 'import': | elif stmt.data == 'import': | ||||
| dotted_path = stmt.children[0].children | dotted_path = stmt.children[0].children | ||||
| name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] | name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] | ||||
| grammar_path = os.path.join(*dotted_path[:-1]) + '.g' | |||||
| grammar_path = os.path.join(*dotted_path[:-1]) + '.lark' | |||||
| g = import_grammar(grammar_path) | g = import_grammar(grammar_path) | ||||
| token_options = dict(g.token_defs)[dotted_path[-1]] | token_options = dict(g.token_defs)[dotted_path[-1]] | ||||
| assert isinstance(token_options, tuple) and len(token_options)==2 | assert isinstance(token_options, tuple) and len(token_options)==2 | ||||
| @@ -11,7 +11,7 @@ setup( | |||||
| requires = [], | requires = [], | ||||
| install_requires = [], | install_requires = [], | ||||
| package_data = { '': ['*.md', '*.g'] }, | |||||
| package_data = { '': ['*.md', '*.lark'] }, | |||||
| test_suite = 'tests.__main__', | test_suite = 'tests.__main__', | ||||