From e9f4c0304af9ece3171d72d0b1a7ff333e102d7b Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Tue, 27 Apr 2021 17:31:52 -0500 Subject: [PATCH] Docs: tools page + more explanations --- docs/features.md | 4 ++-- docs/how_to_use.md | 24 +++++++++++++++++++++- docs/index.rst | 4 ++-- docs/parsers.md | 24 +++++++++++++++++++++- docs/philosophy.md | 2 ++ docs/{nearley.md => tools.md} | 38 ++++++++++++++++++++++++++++------- lark/tools/nearley.py | 3 +++ lark/tools/serialize.py | 3 +++ lark/tools/standalone.py | 3 +++ 9 files changed, 92 insertions(+), 13 deletions(-) rename docs/{nearley.md => tools.md} (63%) diff --git a/docs/features.md b/docs/features.md index a187957..855045b 100644 --- a/docs/features.md +++ b/docs/features.md @@ -7,7 +7,7 @@ - Implements a parse-aware lexer that provides a better power of expression than traditional LALR implementations (such as ply). - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) - Builds a parse-tree (AST) automagically based on the grammar - - Stand-alone parser generator - create a small independent parser to embed in your project. + - Stand-alone parser generator - create a small independent parser to embed in your project. ([read more](tools.md)) - Flexible error handling by using an interactive parser interface (LALR only) - Automatic line & column tracking (for both tokens and matched rules) - Automatic terminal collision resolution @@ -24,7 +24,7 @@ - Import rules and tokens from other Lark grammars, for code reuse and modularity. - Support for external regex module ([see here](classes.html#using-unicode-character-classes-with-regex)) - - Import grammars from Nearley.js ([read more](nearley.md)) + - Import grammars from Nearley.js ([read more](tools.md)) - CYK parser - Visualize your parse trees as dot or png files ([see_example](https://github.com/lark-parser/lark/blob/master/examples/fruitflies.py)) diff --git a/docs/how_to_use.md b/docs/how_to_use.md index 5e962cf..5568334 100644 --- a/docs/how_to_use.md +++ b/docs/how_to_use.md @@ -26,7 +26,7 @@ Read the tutorials to get a better understanding of how everything works. (links Use the [Cheatsheet (PDF)](https://lark-parser.readthedocs.io/en/latest/_static/lark_cheatsheet.pdf) for quick reference. -Use the reference pages for more in-depth explanations. (links in the [main page](/index)] +Use the reference pages for more in-depth explanations. (links in the [main page](/index)) ## Debug @@ -59,3 +59,25 @@ a: "a" ''' p = Lark(collision_grammar, parser='lalr', debug=True) ``` + +## Tools + +### Stand-alone parser + +Lark can generate a stand-alone LALR(1) parser from a grammar. + +The resulting module provides the same interface as Lark, but with a fixed grammar, and reduced functionality. + +Run using: + +```bash +python -m lark.tools.standalone +``` + +For a play-by-play, read the [tutorial](http://blog.erezsh.com/create-a-stand-alone-lalr1-parser-in-python/) + +### Import Nearley.js grammars + +It is possible to import Nearley grammars into Lark. The Javascript code is translated using Js2Py. + +Read the [reference page](nearley.md) diff --git a/docs/index.rst b/docs/index.rst index a526d53..39ecd5a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -37,7 +37,7 @@ Welcome to Lark's documentation! classes visitors forest - nearley + tools @@ -102,7 +102,7 @@ Resources - :doc:`visitors` - :doc:`forest` - :doc:`classes` - - :doc:`nearley` + - :doc:`tools` - `Cheatsheet (PDF)`_ - Discussion diff --git a/docs/parsers.md b/docs/parsers.md index af7df6f..cf2c066 100644 --- a/docs/parsers.md +++ b/docs/parsers.md @@ -42,9 +42,17 @@ Warning: This lexer can be much slower, especially for open-ended terminals such [LALR(1)](https://www.wikiwand.com/en/LALR_parser) is a very efficient, true-and-tested parsing algorithm. It's incredibly fast and requires very little memory. It can parse most programming languages (For example: Python and Java). +LALR(1) stands for: + +- Left-to-right parsing order + +- Rightmost derivation, bottom-up + +- Lookahead of 1 token + Lark comes with an efficient implementation that outperforms every other parsing library for Python (including PLY) -Lark extends the traditional YACC-based architecture with a *contextual lexer*, which automatically provides feedback from the parser to the lexer, making the LALR(1) algorithm stronger than ever. +Lark extends the traditional YACC-based architecture with a *contextual lexer*, which processes feedback from the parser, making the LALR(1) algorithm stronger than ever. The contextual lexer communicates with the parser, and uses the parser's lookahead prediction to narrow its choice of terminals. So at each point, the lexer only matches the subgroup of terminals that are legal at that parser state, instead of all of the terminals. It’s surprisingly effective at resolving common terminal collisions, and allows one to parse languages that LALR(1) was previously incapable of parsing. @@ -52,6 +60,20 @@ The contextual lexer communicates with the parser, and uses the parser's lookahe This is an improvement to LALR(1) that is unique to Lark. +### Grammar constraints in LALR(1) + +Due to having only a lookahead of one token, LALR is limited in its ability to choose between rules, when they both match the input. + +Tips for writing a conforming grammar: + +- Try to avoid writing different rules that can match the same sequence of characters. + +- For the best performance, prefer left-recursion over right-recursion. + +- Consider setting terminal priority only as a last resort. + +For a better understanding of these constraints, it's recommended to learn how a SLR parser works. SLR is very similar to LALR but much simpler. + ## CYK Parser A [CYK parser](https://www.wikiwand.com/en/CYK_algorithm) can parse any context-free grammar at O(n^3*|G|). diff --git a/docs/philosophy.md b/docs/philosophy.md index b2ce6e7..ebdbd4f 100644 --- a/docs/philosophy.md +++ b/docs/philosophy.md @@ -53,6 +53,8 @@ The Earley algorithm can accept *any* context-free grammar you throw at it (i.e. As the users grow to understand the structure of their grammar, the scope of their target language, and their performance requirements, they may choose to switch over to LALR(1) to gain a huge performance boost, possibly at the cost of some language features. +Both Earley and LALR(1) can use the same grammar, as long as all constraints are satisfied. + In short, "Premature optimization is the root of all evil." ### Other design features diff --git a/docs/nearley.md b/docs/tools.md similarity index 63% rename from docs/nearley.md rename to docs/tools.md index 4ab8595..3ea5176 100644 --- a/docs/nearley.md +++ b/docs/tools.md @@ -1,26 +1,49 @@ -# Importing grammars from Nearley +# Tools (Stand-alone, Nearley) + +## Stand-alone parser + +Lark can generate a stand-alone LALR(1) parser from a grammar. + +The resulting module provides the same interface as Lark, but with a fixed grammar, and reduced functionality. + +Run using: + +```bash +python -m lark.tools.standalone +``` + +For a play-by-play, read the [tutorial](http://blog.erezsh.com/create-a-stand-alone-lalr1-parser-in-python/) + + +## Importing grammars from Nearley.js Lark comes with a tool to convert grammars from [Nearley](https://github.com/Hardmath123/nearley), a popular Earley library for Javascript. It uses [Js2Py](https://github.com/PiotrDabkowski/Js2Py) to convert and run the Javascript postprocessing code segments. -## Requirements +#### Requirements 1. Install Lark with the `nearley` component: ```bash pip install lark-parser[nearley] ``` -2. Acquire a copy of the nearley codebase. This can be done using: +2. Acquire a copy of the Nearley codebase. This can be done using: ```bash git clone https://github.com/Hardmath123/nearley ``` -## Usage +#### Usage + +The tool can be run using: + +```bash +python -m lark.tools.nearley +``` Here's an example of how to import nearley's calculator example into Lark: ```bash git clone https://github.com/Hardmath123/nearley -python -m lark.tools.nearley nearley/examples/calculator/arithmetic.ne main nearley > ncalc.py +python -m lark.tools.nearley nearley/examples/calculator/arithmetic.ne main ./nearley > ncalc.py ``` You can use the output as a regular python module: @@ -38,10 +61,11 @@ git clone https://github.com/Hardmath123/nearley python -m lark.tools.nearley nearley/examples/calculator/arithmetic.ne main nearley --es6 > ncalc.py ``` -## Notes +#### Notes - Lark currently cannot import templates from Nearley - Lark currently cannot export grammars to Nearley -These might get added in the future, if enough users ask for them. \ No newline at end of file +These might get added in the future, if enough users ask for them. + diff --git a/lark/tools/nearley.py b/lark/tools/nearley.py index faf2b69..f0779dc 100644 --- a/lark/tools/nearley.py +++ b/lark/tools/nearley.py @@ -194,5 +194,8 @@ def get_arg_parser(): if __name__ == '__main__': parser = get_arg_parser() + if len(sys.argv)==1: + parser.print_help(sys.stderr) + sys.exit(1) args = parser.parse_args() print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6)) diff --git a/lark/tools/serialize.py b/lark/tools/serialize.py index 10884eb..6154024 100644 --- a/lark/tools/serialize.py +++ b/lark/tools/serialize.py @@ -23,6 +23,9 @@ def serialize(lark_inst, outfile): def main(): + if len(sys.argv)==1: + argparser.print_help(sys.stderr) + sys.exit(1) ns = argparser.parse_args() serialize(*build_lalr(ns)) diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py index 038e181..c86d7d7 100644 --- a/lark/tools/standalone.py +++ b/lark/tools/standalone.py @@ -181,6 +181,9 @@ def main(): parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options') parser.add_argument("old_start", nargs='?', help=SUPPRESS) parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression") + if len(sys.argv)==1: + parser.print_help(sys.stderr) + sys.exit(1) ns = parser.parse_args() if ns.old_start is not None: warn('The syntax `python -m lark.tools.standalone ` is deprecated. Use the -s option')