| @@ -33,7 +33,7 @@ class LarkOptions: | |||||
| regex: bool | regex: bool | ||||
| debug: bool | debug: bool | ||||
| keep_all_tokens: bool | keep_all_tokens: bool | ||||
| propagate_positions: bool | |||||
| propagate_positions: Union[bool, str] | |||||
| maybe_placeholders: bool | maybe_placeholders: bool | ||||
| lexer_callbacks: Dict[str, Callable[[Token], Token]] | lexer_callbacks: Dict[str, Callable[[Token], Token]] | ||||
| cache: Union[bool, str] | cache: Union[bool, str] | ||||
| @@ -77,7 +77,7 @@ class Lark: | |||||
| regex: bool = False, | regex: bool = False, | ||||
| debug: bool = False, | debug: bool = False, | ||||
| keep_all_tokens: bool = False, | keep_all_tokens: bool = False, | ||||
| propagate_positions: bool = False, | |||||
| propagate_positions: Union[bool, str] = False, | |||||
| maybe_placeholders: bool = False, | maybe_placeholders: bool = False, | ||||
| lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, | lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, | ||||
| cache: Union[bool, str] = False, | cache: Union[bool, str] = False, | ||||
| @@ -76,7 +76,7 @@ class TerminalDef: | |||||
| class Token(str): | class Token(str): | ||||
| type: str | type: str | ||||
| pos_in_stream: int | |||||
| start_pos: int | |||||
| value: Any | value: Any | ||||
| line: int | line: int | ||||
| column: int | column: int | ||||
| @@ -84,7 +84,7 @@ class Token(str): | |||||
| end_column: int | end_column: int | ||||
| end_pos: int | end_pos: int | ||||
| def __init__(self, type_: str, value: Any, pos_in_stream: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None: | |||||
| def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None: | |||||
| ... | ... | ||||
| def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token: | def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token: | ||||
| @@ -7,4 +7,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | |||||
| from .lexer import Token | from .lexer import Token | ||||
| from .lark import Lark | from .lark import Lark | ||||
| __version__ = "0.11.3" | |||||
| __version__ = "0.11.4" | |||||
| @@ -44,8 +44,9 @@ class LarkOptions(Serialize): | |||||
| Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster) | ||||
| propagate_positions | propagate_positions | ||||
| Propagates (line, column, end_line, end_column) attributes into all tree branches. | Propagates (line, column, end_line, end_column) attributes into all tree branches. | ||||
| Accepts ``False``, ``True``, or "ignore_ws", which will trim the whitespace around your trees. | |||||
| maybe_placeholders | maybe_placeholders | ||||
| When True, the ``[]`` operator returns ``None`` when not matched. | |||||
| When ``True``, the ``[]`` operator returns ``None`` when not matched. | |||||
| When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. | When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. | ||||
| (default= ``False``. Recommended to set to ``True``) | (default= ``False``. Recommended to set to ``True``) | ||||
| @@ -145,7 +146,7 @@ class LarkOptions(Serialize): | |||||
| for name, default in self._defaults.items(): | for name, default in self._defaults.items(): | ||||
| if name in o: | if name in o: | ||||
| value = o.pop(name) | value = o.pop(name) | ||||
| if isinstance(default, bool) and name not in ('cache', 'use_bytes'): | |||||
| if isinstance(default, bool) and name not in ('cache', 'use_bytes', 'propagate_positions'): | |||||
| value = bool(value) | value = bool(value) | ||||
| else: | else: | ||||
| value = default | value = default | ||||
| @@ -573,7 +574,7 @@ class Lark(Serialize): | |||||
| @property | @property | ||||
| def source(self): | def source(self): | ||||
| warn("Lark.source attribute has been renamed to Lark.source_path", DeprecationWarning) | |||||
| warn("Attribute Lark.source was renamed to Lark.source_path", DeprecationWarning) | |||||
| return self.source_path | return self.source_path | ||||
| @source.setter | @source.setter | ||||
| @@ -582,7 +583,7 @@ class Lark(Serialize): | |||||
| @property | @property | ||||
| def grammar_source(self): | def grammar_source(self): | ||||
| warn("Lark.grammar_source attribute has been renamed to Lark.source_grammar", DeprecationWarning) | |||||
| warn("Attribute Lark.grammar_source was renamed to Lark.source_grammar", DeprecationWarning) | |||||
| return self.source_grammar | return self.source_grammar | ||||
| @grammar_source.setter | @grammar_source.setter | ||||
| @@ -6,6 +6,7 @@ from .utils import Str, classify, get_regexp_width, Py36, Serialize, suppress | |||||
| from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | ||||
| ###{standalone | ###{standalone | ||||
| from warnings import warn | |||||
| from copy import copy | from copy import copy | ||||
| @@ -128,9 +129,9 @@ class Token(Str): | |||||
| end_column will be 5. | end_column will be 5. | ||||
| end_pos: the index where the token ends (basically ``pos_in_stream + len(token)``) | end_pos: the index where the token ends (basically ``pos_in_stream + len(token)``) | ||||
| """ | """ | ||||
| __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | |||||
| __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | |||||
| def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): | |||||
| def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None): | |||||
| try: | try: | ||||
| self = super(Token, cls).__new__(cls, value) | self = super(Token, cls).__new__(cls, value) | ||||
| except UnicodeDecodeError: | except UnicodeDecodeError: | ||||
| @@ -138,7 +139,7 @@ class Token(Str): | |||||
| self = super(Token, cls).__new__(cls, value) | self = super(Token, cls).__new__(cls, value) | ||||
| self.type = type_ | self.type = type_ | ||||
| self.pos_in_stream = pos_in_stream | |||||
| self.start_pos = start_pos if start_pos is not None else pos_in_stream | |||||
| self.value = value | self.value = value | ||||
| self.line = line | self.line = line | ||||
| self.column = column | self.column = column | ||||
| @@ -147,6 +148,11 @@ class Token(Str): | |||||
| self.end_pos = end_pos | self.end_pos = end_pos | ||||
| return self | return self | ||||
| @property | |||||
| def pos_in_stream(self): | |||||
| warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning) | |||||
| return self.start_pos | |||||
| def update(self, type_=None, value=None): | def update(self, type_=None, value=None): | ||||
| return Token.new_borrow_pos( | return Token.new_borrow_pos( | ||||
| type_ if type_ is not None else self.type, | type_ if type_ is not None else self.type, | ||||
| @@ -156,16 +162,16 @@ class Token(Str): | |||||
| @classmethod | @classmethod | ||||
| def new_borrow_pos(cls, type_, value, borrow_t): | def new_borrow_pos(cls, type_, value, borrow_t): | ||||
| return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | |||||
| return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | |||||
| def __reduce__(self): | def __reduce__(self): | ||||
| return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column)) | |||||
| return (self.__class__, (self.type, self.value, self.start_pos, self.line, self.column)) | |||||
| def __repr__(self): | def __repr__(self): | ||||
| return 'Token(%r, %r)' % (self.type, self.value) | return 'Token(%r, %r)' % (self.type, self.value) | ||||
| def __deepcopy__(self, memo): | def __deepcopy__(self, memo): | ||||
| return Token(self.type, self.value, self.pos_in_stream, self.line, self.column) | |||||
| return Token(self.type, self.value, self.start_pos, self.line, self.column) | |||||
| def __eq__(self, other): | def __eq__(self, other): | ||||
| if isinstance(other, Token) and self.type != other.type: | if isinstance(other, Token) and self.type != other.type: | ||||
| @@ -1,4 +1,4 @@ | |||||
| from .exceptions import GrammarError | |||||
| from .exceptions import GrammarError, ConfigurationError | |||||
| from .lexer import Token | from .lexer import Token | ||||
| from .tree import Tree | from .tree import Tree | ||||
| from .visitors import InlineTransformer # XXX Deprecated | from .visitors import InlineTransformer # XXX Deprecated | ||||
| @@ -21,6 +21,7 @@ class ExpandSingleChild: | |||||
| return self.node_builder(children) | return self.node_builder(children) | ||||
| class PropagatePositions: | class PropagatePositions: | ||||
| def __init__(self, node_builder): | def __init__(self, node_builder): | ||||
| self.node_builder = node_builder | self.node_builder = node_builder | ||||
| @@ -31,40 +32,52 @@ class PropagatePositions: | |||||
| # local reference to Tree.meta reduces number of presence checks | # local reference to Tree.meta reduces number of presence checks | ||||
| if isinstance(res, Tree): | if isinstance(res, Tree): | ||||
| res_meta = res.meta | res_meta = res.meta | ||||
| for c in children: | |||||
| if isinstance(c, Tree): | |||||
| child_meta = c.meta | |||||
| if not child_meta.empty: | |||||
| res_meta.line = child_meta.line | |||||
| res_meta.column = child_meta.column | |||||
| res_meta.start_pos = child_meta.start_pos | |||||
| res_meta.empty = False | |||||
| break | |||||
| elif isinstance(c, Token): | |||||
| res_meta.line = c.line | |||||
| res_meta.column = c.column | |||||
| res_meta.start_pos = c.pos_in_stream | |||||
| res_meta.empty = False | |||||
| break | |||||
| for c in reversed(children): | |||||
| if isinstance(c, Tree): | |||||
| child_meta = c.meta | |||||
| if not child_meta.empty: | |||||
| res_meta.end_line = child_meta.end_line | |||||
| res_meta.end_column = child_meta.end_column | |||||
| res_meta.end_pos = child_meta.end_pos | |||||
| res_meta.empty = False | |||||
| break | |||||
| elif isinstance(c, Token): | |||||
| res_meta.end_line = c.end_line | |||||
| res_meta.end_column = c.end_column | |||||
| res_meta.end_pos = c.end_pos | |||||
| res_meta.empty = False | |||||
| break | |||||
| src_meta = self._pp_get_meta(children) | |||||
| if src_meta is not None: | |||||
| res_meta.line = src_meta.line | |||||
| res_meta.column = src_meta.column | |||||
| res_meta.start_pos = src_meta.start_pos | |||||
| res_meta.empty = False | |||||
| src_meta = self._pp_get_meta(reversed(children)) | |||||
| if src_meta is not None: | |||||
| res_meta.end_line = src_meta.end_line | |||||
| res_meta.end_column = src_meta.end_column | |||||
| res_meta.end_pos = src_meta.end_pos | |||||
| res_meta.empty = False | |||||
| return res | return res | ||||
| def _pp_get_meta(self, children): | |||||
| for c in children: | |||||
| if isinstance(c, Tree): | |||||
| if not c.meta.empty: | |||||
| return c.meta | |||||
| elif isinstance(c, Token): | |||||
| return c | |||||
| class PropagatePositions_IgnoreWs(PropagatePositions): | |||||
| def _pp_get_meta(self, children): | |||||
| for c in children: | |||||
| if isinstance(c, Tree): | |||||
| if not c.meta.empty: | |||||
| return c.meta | |||||
| elif isinstance(c, Token): | |||||
| if c and not c.isspace(): # Disregard whitespace-only tokens | |||||
| return c | |||||
| def make_propagate_positions(option): | |||||
| if option == "ignore_ws": | |||||
| return PropagatePositions_IgnoreWs | |||||
| elif option is True: | |||||
| return PropagatePositions | |||||
| elif option is False: | |||||
| return None | |||||
| raise ConfigurationError('Invalid option for propagate_positions: %r' % option) | |||||
| class ChildFilter: | class ChildFilter: | ||||
| def __init__(self, to_include, append_none, node_builder): | def __init__(self, to_include, append_none, node_builder): | ||||
| @@ -320,6 +333,8 @@ class ParseTreeBuilder: | |||||
| self.rule_builders = list(self._init_builders(rules)) | self.rule_builders = list(self._init_builders(rules)) | ||||
| def _init_builders(self, rules): | def _init_builders(self, rules): | ||||
| propagate_positions = make_propagate_positions(self.propagate_positions) | |||||
| for rule in rules: | for rule in rules: | ||||
| options = rule.options | options = rule.options | ||||
| keep_all_tokens = options.keep_all_tokens | keep_all_tokens = options.keep_all_tokens | ||||
| @@ -328,7 +343,7 @@ class ParseTreeBuilder: | |||||
| wrapper_chain = list(filter(None, [ | wrapper_chain = list(filter(None, [ | ||||
| (expand_single_child and not rule.alias) and ExpandSingleChild, | (expand_single_child and not rule.alias) and ExpandSingleChild, | ||||
| maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), | maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), | ||||
| self.propagate_positions and PropagatePositions, | |||||
| propagate_positions, | |||||
| self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), | self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), | ||||
| self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) | self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class) | ||||
| ])) | ])) | ||||