diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cdb93cd --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.python-version diff --git a/MANIFEST.in b/MANIFEST.in index 5ee4903..019e37a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.g tests/*.py tests/test_nearley/*.py tests/test_nearley/grammars/* +include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.lark tests/*.py tests/test_nearley/*.py tests/test_nearley/grammars/* diff --git a/docs/json_tutorial.md b/docs/json_tutorial.md index 9f3fbf1..96e76fb 100644 --- a/docs/json_tutorial.md +++ b/docs/json_tutorial.md @@ -79,7 +79,7 @@ By the way, if you're curious what these terminals signify, they are roughly equ Lark will accept this, if you really want to complicate your life :) -(You can find the original definitions in [common.g](/lark/grammars/common.g).) +(You can find the original definitions in [common.lark](/lark/grammars/common.lark).) Notice that terminals are written in UPPER-CASE, while rules are written in lower-case. I'll touch more on the differences between rules and terminals later. diff --git a/examples/README.md b/examples/README.md index 37076d5..3ca623a 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,6 +7,7 @@ - [indented\_tree.py](indented\_tree.py) - A demonstration of parsing indentation ("whitespace significant" language) - [fruitflies.py](fruitflies.py) - A demonstration of ambiguity - [turtle\_dsl.py](turtle_dsl.py) - Implements a LOGO-like toy language for Python's turtle, with interpreter. +- [lark\_grammar.py](lark_grammar.py) + [lark.lark](lark.lark) - A reference implementation of the Lark grammar (using LALR(1) + standard lexer) ### Advanced diff --git a/examples/lark.lark b/examples/lark.lark new file mode 100644 index 0000000..1fbf592 --- /dev/null +++ b/examples/lark.lark @@ -0,0 +1,49 @@ +start: (_item | _NL)* + +_item: rule + | token + | statement + +rule: RULE priority? ":" expansions _NL +token: TOKEN priority? ":" expansions _NL + +priority: "." NUMBER + +statement: "%ignore" expansions _NL -> ignore + | "%import" import_args ["->" TOKEN] _NL -> import + +import_args: name ("." name)* + +?expansions: alias (_VBAR alias)* + +?alias: expansion ["->" RULE] + +?expansion: expr* + +?expr: atom [OP | "~" NUMBER [".." NUMBER]] + +?atom: "(" expansions ")" + | "[" expansions "]" -> maybe + | STRING ".." STRING -> literal_range + | name + | (REGEXP | STRING) -> literal + +name: RULE + | TOKEN + +_VBAR: _NL? "|" +OP: /[+*][?]?|[?](?![a-z])/ +RULE: /!?[_?]?[a-z][_a-z0-9]*/ +TOKEN: /_?[A-Z][_A-Z0-9]*/ +STRING: _STRING "i"? +REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/ +_NL: /(\r?\n)+\s*/ + +%import common.ESCAPED_STRING -> _STRING +%import common.INT -> NUMBER +%import common.WS_INLINE + +COMMENT: "//" /[^\n]/* + +%ignore WS_INLINE +%ignore COMMENT diff --git a/examples/lark_grammar.py b/examples/lark_grammar.py new file mode 100644 index 0000000..30ccc8b --- /dev/null +++ b/examples/lark_grammar.py @@ -0,0 +1,18 @@ +from lark import Lark + +parser = Lark(open('examples/lark.lark'), parser="lalr") + +grammar_files = [ + 'examples/python2.lark', + 'examples/python3.lark', + 'examples/lark.lark', + 'lark/grammars/common.lark', +] + +def test(): + for grammar_file in grammar_files: + tree = parser.parse(open(grammar_file).read()) + print("All grammars parsed successfully") + +if __name__ == '__main__': + test() diff --git a/examples/python2.g b/examples/python2.lark similarity index 100% rename from examples/python2.g rename to examples/python2.lark diff --git a/examples/python3.g b/examples/python3.lark similarity index 100% rename from examples/python3.g rename to examples/python3.lark diff --git a/examples/python_parser.py b/examples/python_parser.py index 0f9f30b..988fd97 100644 --- a/examples/python_parser.py +++ b/examples/python_parser.py @@ -22,10 +22,9 @@ class PythonIndenter(Indenter): kwargs = dict(rel_to=__file__, postlex=PythonIndenter(), start='file_input') -python_parser2 = Lark.open('python2.g', parser='lalr', **kwargs) -python_parser3 = Lark.open('python3.g',parser='lalr', **kwargs) -python_parser2_earley = Lark.open('python2.g', parser='earley', lexer='standard', **kwargs) -print(python_parser3) +python_parser2 = Lark.open('python2.lark', parser='lalr', **kwargs) +python_parser3 = Lark.open('python3.lark',parser='lalr', **kwargs) +python_parser2_earley = Lark.open('python2.lark', parser='earley', lexer='standard', **kwargs) def _read(fn, *args): diff --git a/examples/standalone/create_standalone.sh b/examples/standalone/create_standalone.sh index 1eba3a4..a4fa879 100755 --- a/examples/standalone/create_standalone.sh +++ b/examples/standalone/create_standalone.sh @@ -1 +1 @@ -python -m lark.tools.standalone json.g > json_parser.py +python -m lark.tools.standalone json.lark > json_parser.py diff --git a/examples/standalone/json.g b/examples/standalone/json.lark similarity index 100% rename from examples/standalone/json.g rename to examples/standalone/json.lark diff --git a/lark/grammars/common.g b/lark/grammars/common.lark similarity index 94% rename from lark/grammars/common.g rename to lark/grammars/common.lark index 2bd02d0..8bc8079 100644 --- a/lark/grammars/common.g +++ b/lark/grammars/common.lark @@ -20,6 +20,7 @@ SIGNED_NUMBER: ["+"|"-"] NUMBER // // Strings // +//STRING: /"(\\\"|\\\\|[^"\n])*?"i?/ STRING_INNER: ("\\\""|/[^"]/) ESCAPED_STRING: "\"" STRING_INNER* "\"" diff --git a/lark/load_grammar.py b/lark/load_grammar.py index f5a0be8..b802fe1 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -375,6 +375,7 @@ class TokenTreeToPattern(Transformer): return p def expansion(self, items): + assert items if len(items) == 1: return items[0] if len({i.flags for i in items}) > 1: @@ -611,7 +612,7 @@ class GrammarLoader: elif stmt.data == 'import': dotted_path = stmt.children[0].children name = stmt.children[1] if len(stmt.children)>1 else dotted_path[-1] - grammar_path = os.path.join(*dotted_path[:-1]) + '.g' + grammar_path = os.path.join(*dotted_path[:-1]) + '.lark' g = import_grammar(grammar_path) token_options = dict(g.token_defs)[dotted_path[-1]] assert isinstance(token_options, tuple) and len(token_options)==2 diff --git a/setup.py b/setup.py index 430ae5c..8543fd4 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup( requires = [], install_requires = [], - package_data = { '': ['*.md', '*.g'] }, + package_data = { '': ['*.md', '*.lark'] }, test_suite = 'tests.__main__',