From f30728fcefda6f2fc4523b6c7a3761162e4374e9 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Mon, 30 Mar 2020 12:44:43 +0200 Subject: [PATCH] Add FilterState action. --- crates/generated_parser/src/traits/mod.rs | 1 + crates/parser/src/parser.rs | 3 + crates/parser/src/simulator.rs | 3 + jsparagus/actions.py | 68 ++++++++++++++++++++++- jsparagus/aps.py | 20 ++++++- jsparagus/emit/python.py | 26 +++++---- jsparagus/emit/rust.py | 51 ++++++++++++++--- jsparagus/parse_table.py | 17 +++--- jsparagus/runtime.py | 3 + 9 files changed, 162 insertions(+), 30 deletions(-) diff --git a/crates/generated_parser/src/traits/mod.rs b/crates/generated_parser/src/traits/mod.rs index cc1ede4fd..53b888a8d 100644 --- a/crates/generated_parser/src/traits/mod.rs +++ b/crates/generated_parser/src/traits/mod.rs @@ -30,5 +30,6 @@ pub trait ParserTrait<'alloc, Value> { fn pop(&mut self) -> TermValue; fn replay(&mut self, tv: TermValue); fn epsilon(&mut self, state: usize); + fn top_state(&self) -> usize; fn check_not_on_new_line(&mut self, peek: usize) -> Result<'alloc, bool>; } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index c0306c2b6..f7e6b338c 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -83,6 +83,9 @@ impl<'alloc> ParserTrait<'alloc, StackValue<'alloc>> for Parser<'alloc> { fn epsilon(&mut self, state: usize) { *self.state_stack.last_mut().unwrap() = state; } + fn top_state(&self) -> usize { + self.state() + } fn check_not_on_new_line(&mut self, peek: usize) -> Result<'alloc, bool> { let sv = { let stack = self.node_stack.stack_slice(); diff --git a/crates/parser/src/simulator.rs b/crates/parser/src/simulator.rs index 0d8a376de..31061f2ff 100644 --- a/crates/parser/src/simulator.rs +++ b/crates/parser/src/simulator.rs @@ -95,6 +95,9 @@ impl<'alloc, 'parser> ParserTrait<'alloc, ()> for Simulator<'alloc, 'parser> { } *self.sim_state_stack.last_mut().unwrap() = state; } + fn top_state(&self) -> usize { + self.state() + } fn check_not_on_new_line(&mut self, _peek: usize) -> Result<'alloc, bool> { Ok(true) } diff --git a/jsparagus/actions.py b/jsparagus/actions.py index 7f4315cd6..060432e88 100644 --- a/jsparagus/actions.py +++ b/jsparagus/actions.py @@ -4,6 +4,7 @@ import typing import dataclasses +from .ordered import OrderedFrozenSet from .grammar import Element, ErrorSymbol, InitNt, Nt from . import types, grammar @@ -64,6 +65,16 @@ def condition(self) -> Action: "Return the conditional action." raise TypeError("Action.condition not implemented") + def check_same_variable(self, other: Action) -> bool: + "Return whether both conditionals are checking the same variable." + assert self.is_condition() + raise TypeError("Action.check_same_variable not implemented") + + def check_different_values(self, other: Action) -> bool: + "Return whether these 2 conditions are mutually exclusive." + assert self.is_condition() + raise TypeError("Action.check_different_values not implemented") + def follow_edge(self) -> bool: """Whether the execution of this action resume following the epsilon transition (True) or if it breaks the graph epsilon transition (False) and returns @@ -78,7 +89,7 @@ def update_stack_with(self) -> StackDiff: """Returns a StackDiff which represents the mutation to be applied to the parser stack.""" assert self.update_stack() - raise TypeError("Action::update_stack_with not implemented") + raise TypeError("Action.update_stack_with not implemented") def shifted_action(self, shifted_term: Element) -> ShiftedAction: """Transpose this action with shifting the given terminal or Nt. @@ -235,7 +246,8 @@ def __init__(self, terms: typing.FrozenSet[str], accept: bool): def is_inconsistent(self) -> bool: # A lookahead restriction cannot be encoded in code, it has to be - # solved using fix_with_lookahead. + # solved using fix_with_lookahead, which encodes the lookahead + # resolution in the generated parse table. return True def is_condition(self) -> bool: @@ -244,6 +256,12 @@ def is_condition(self) -> bool: def condition(self) -> Lookahead: return self + def check_same_variable(self, other: Action) -> bool: + raise TypeError("Lookahead.check_same_variables: Lookahead are always inconsistent") + + def check_different_values(self, other: Action) -> bool: + raise TypeError("Lookahead.check_different_values: Lookahead are always inconsistent") + def __str__(self) -> str: return "Lookahead({}, {})".format(self.terms, self.accept) @@ -281,6 +299,12 @@ def is_condition(self) -> bool: def condition(self) -> CheckNotOnNewLine: return self + def check_same_variable(self, other: Action) -> bool: + return isinstance(other, CheckNotOnNewLine) and self.offset == other.offset + + def check_different_values(self, other: Action) -> bool: + return False + def shifted_action(self, shifted_term: Element) -> ShiftedAction: if isinstance(shifted_term, Nt): return True @@ -290,6 +314,39 @@ def __str__(self) -> str: return "CheckNotOnNewLine({})".format(self.offset) +class FilterStates(Action): + """Check whether the stack at a given depth match the state value, if so + transition to the destination, otherwise check other states.""" + __slots__ = ['states'] + + states: OrderedFrozenSet[StateId] + + def __init__(self, states: typing.Iterable[StateId]): + super().__init__() + # Set of states which can follow this transition. + self.states = OrderedFrozenSet(sorted(states)) + + def is_condition(self) -> bool: + return True + + def condition(self) -> FilterStates: + return self + + def check_same_variable(self, other: Action) -> bool: + return isinstance(other, FilterStates) + + def check_different_values(self, other: Action) -> bool: + assert isinstance(other, FilterStates) + return self.states.is_disjoint(other.states) + + def rewrite_state_indexes(self, state_map: typing.Dict[StateId, StateId]) -> FilterStates: + states = list(state_map[s] for s in self.states) + return FilterStates(states) + + def __str__(self) -> str: + return "FilterStates({})".format(self.states) + + class FilterFlag(Action): """Define a filter which check for one value of the flag, and continue to the next state if the top of the flag stack matches the expected value.""" @@ -309,6 +366,13 @@ def is_condition(self) -> bool: def condition(self) -> FilterFlag: return self + def check_same_variable(self, other: Action) -> bool: + return isinstance(other, FilterFlag) and self.flag == other.flag + + def check_different_values(self, other: Action) -> bool: + assert isinstance(other, FilterFlag) + return self.value != other.value + def __str__(self) -> str: return "FilterFlag({}, {})".format(self.flag, self.value) diff --git a/jsparagus/aps.py b/jsparagus/aps.py index 2a577bbb1..61496340f 100644 --- a/jsparagus/aps.py +++ b/jsparagus/aps.py @@ -3,9 +3,8 @@ import typing from dataclasses import dataclass -from .grammar import Nt from .lr0 import ShiftedTerm, Term -from .actions import Action +from .actions import Action, FilterStates # Avoid circular reference between this module and parse_table.py if typing.TYPE_CHECKING: @@ -191,6 +190,7 @@ def shift_next(self, pt: ParseTable) -> typing.Iterator[APS]: st, sh, la, rp, hs = self.stack, self.shift, self.lookahead, self.replay, self.history last_edge = sh[-1] state = pt.states[last_edge.src] + state_match_shift_end = True if self.replay == []: for term, to in state.shifted_edges(): edge = Edge(last_edge.src, term) @@ -288,6 +288,22 @@ def shift_next(self, pt: ParseTable) -> typing.Iterator[APS]: new_rp = new_rp + rp new_la = la[:max(len(la) - replay, 0)] yield APS(new_st, new_sh, new_la, new_rp, hs + [edge], True) + elif isinstance(a, FilterStates): + # FilterStates is added by the graph transformation and is + # expected to be added after the replacement of + # Reduce(Unwind(...)) by Unwind, FilterStates and Replay + # actions. Thus, at the time when FilterStates is encountered, + # we do not expect `self.states` to match the last element of + # the `shift` list to match. + assert not state_match_shift_end + + # Emulate FilterStates condition, which is to branch to the + # destination if the state value from the top of the stack is + # in the list of states of this condition. + if self.shift[-1].src in a.states: + # TODO: add the to-state destination common to all actions + # which are following edges. + yield APS(st, sh, la, rp, hs + [edge], self.reducing) else: edge_to = Edge(to, None) yield APS(st, prev_sh + [edge_to], la, rp, hs + [edge], self.reducing) diff --git a/jsparagus/emit/python.py b/jsparagus/emit/python.py index ef48aa498..15a24f4da 100644 --- a/jsparagus/emit/python.py +++ b/jsparagus/emit/python.py @@ -6,7 +6,7 @@ import typing from ..grammar import ErrorSymbol, Nt, Some -from ..actions import (Accept, Action, CheckNotOnNewLine, FilterFlag, FunCall, +from ..actions import (Accept, Action, CheckNotOnNewLine, FilterFlag, FilterStates, FunCall, Lookahead, OutputExpr, PopFlag, PushFlag, Reduce, Seq) from ..runtime import ErrorToken, ErrorTokenClass from ..ordered import OrderedSet @@ -31,6 +31,17 @@ def write_python_parse_table(out: io.TextIOBase, parse_table: ParseTable) -> Non methods: OrderedSet[FunCall] = OrderedSet() + def write_epsilon_transition(indent: str, dest: StateId): + if parse_table.states[dest].epsilon != []: + # This is a transition to an action. + out.write("{}state_{}_actions(parser, lexer)\n".format(indent, dest)) + else: + # This is a transition to a shift. + out.write("{}top = parser.stack.pop()\n".format(indent)) + out.write("{}top = StateTermValue({}, top.term, top.value, top.new_line)\n" + .format(indent, dest)) + out.write("{}parser.stack.append(top)\n".format(indent)) + def write_action(act: Action, indent: str = "") -> typing.Tuple[str, bool]: assert not act.is_inconsistent() if isinstance(act, Reduce): @@ -52,6 +63,9 @@ def write_action(act: Action, indent: str = "") -> typing.Tuple[str, bool]: out.write("{}if not parser.check_not_on_new_line(lexer, {}):\n".format(indent, -act.offset)) out.write("{} return\n".format(indent)) return indent, True + if isinstance(act, FilterStates): + out.write("{}if parser.top_state() in [{}]:\n".format(indent, ", ".join(map(str, act.states)))) + return indent + " ", True if isinstance(act, FilterFlag): out.write("{}if parser.flags[{}][-1] == {}:\n".format(indent, act.flag, act.value)) return indent + " ", True @@ -113,15 +127,7 @@ def map_with_offset(args: typing.Iterable[OutputExpr]) -> typing.Iterator[str]: print(parse_table.debug_context(state.index, "\n", "# ")) raise if fallthrough: - if parse_table.states[dest].epsilon != []: - # This is a transition to an action. - out.write("{}state_{}_actions(parser, lexer)\n".format(indent, dest)) - else: - # This is a transition to a shift. - out.write("{}top = parser.stack.pop()\n".format(indent)) - out.write("{}top = StateTermValue({}, top.term, top.value, top.new_line)\n" - .format(indent, dest)) - out.write("{}parser.stack.append(top)\n".format(indent)) + write_epsilon_transition(indent, dest) out.write("{}return\n".format(indent)) out.write("\n") diff --git a/jsparagus/emit/rust.py b/jsparagus/emit/rust.py index c5a2ad6b8..1c9d7e9fc 100644 --- a/jsparagus/emit/rust.py +++ b/jsparagus/emit/rust.py @@ -4,14 +4,14 @@ import re import unicodedata import sys +from contextlib import contextmanager from ..runtime import (ERROR, ErrorToken, SPECIAL_CASE_TAG) from ..ordered import OrderedSet -from ..grammar import (CallMethod, Some, is_concrete_element, Nt, InitNt, Optional, End, - ErrorSymbol) -from ..actions import (Accept, Action, Reduce, Lookahead, CheckNotOnNewLine, FilterFlag, PushFlag, - PopFlag, FunCall, Seq) +from ..grammar import (Some, Nt, InitNt, End, ErrorSymbol) +from ..actions import (Accept, Action, Reduce, CheckNotOnNewLine, FilterStates, + PushFlag, PopFlag, FunCall, Seq) from .. import types @@ -75,6 +75,20 @@ '...': 'Ellipsis', } + +@contextmanager +def indent(writer): + """This function is meant to be used with the `with` keyword of python, and + allow the user of it to add an indentation level to the code which is + enclosed in the `with` statement. + + This has the advantage that the indentation of the python code is reflected + to the generated code when `with indent(self):` is used. """ + writer.indent += 1 + yield None + writer.indent -= 1 + + class RustActionWriter: """Write epsilon state transitions for a given action function.""" ast_builder = types.Type("AstBuilderDelegate", (types.Lifetime("alloc"),)) @@ -178,11 +192,28 @@ def write_condition(self, state, first_act): assert -act.offset > 0 self.write("// {}", str(act)) self.write("if !parser.check_not_on_new_line({})? {{", -act.offset) - self.indent += 1 - self.write("return Ok(false);") - self.indent -= 1 + with indent(self): + self.write("return Ok(false);") self.write("}") self.write_epsilon_transition(dest) + elif isinstance(first_act, FilterStates): + value = 0 + if len(state.epsilon) == 1: + # This is an attempt to avoid huge unending compilations. + _, dest = next(iter(state.epsilon), (None, None)) + self.write("// parser.top_state() in [{}]", " | ".join(map(str, first_act.states))) + self.write_epsilon_transition(dest) + else: + self.write("match parser.top_state() {") + with indent(self): + for act, dest in state.edges(): + assert first_act.check_same_variable(act) + self.write("{} => {{", " | ".join(map(str, act.states))) + with indent(self): + self.write_epsilon_transition(dest) + self.write("}") + self.write("_ => panic!(\"Unexpected state value.\")") + self.write("}") else: raise ValueError("Unexpected action type") @@ -660,7 +691,8 @@ def actions(self): table_holder_name = self.to_camel_case(mode) table_holder_type = table_holder_name + "<'alloc, Handler>" self.write(0, "struct {} {{", table_holder_type) - self.write(1, "fns: [fn(&mut Handler) -> Result<'alloc, bool>; {}]", self.action_from_shift_count) + self.write(1, "fns: [fn(&mut Handler) -> Result<'alloc, bool>; {}]", + self.action_from_shift_count) self.write(0, "}") self.write(0, "impl<'alloc, Handler> {}", table_holder_type) self.write(0, "where") @@ -681,7 +713,8 @@ def actions(self): self.write(0, "where") self.write(1, "Handler: {}", traits_text) self.write(0, "{") - self.write(1, "{}::<'alloc, Handler>::TABLE.fns[state - {}](parser)", table_holder_name, start_at) + self.write(1, "{}::<'alloc, Handler>::TABLE.fns[state - {}](parser)", + table_holder_name, start_at) self.write(0, "}") self.write(0, "") for state in self.states[self.shift_count:]: diff --git a/jsparagus/parse_table.py b/jsparagus/parse_table.py index 451aefccd..29945b665 100644 --- a/jsparagus/parse_table.py +++ b/jsparagus/parse_table.py @@ -6,6 +6,7 @@ import os import pickle import typing +import itertools from . import types from .utils import consume, keep_until, split @@ -115,18 +116,20 @@ def is_inconsistent(self) -> bool: elif len(self.epsilon) > 1: if any(k.is_inconsistent() for k, s in self.epsilon): return True - # If all the out-going edges are FilterFlags, with the same flag - # and different values, then this state remains consistent, as this - # can be implemented as a deterministic switch statement. + # NOTE: We can accept multiple conditions as epsilon transitions + # iff they are checking the same variable with non-overlapping + # values. This implies that we can implement these conditions as a + # deterministic switch statement in the code emitter. if any(not k.is_condition() for k, s in self.epsilon): return True - if any(not isinstance(k.condition(), FilterFlag) for k, s in self.epsilon): + iterator = iter(self.epsilon) + first, _ = next(iterator) + if any(not first.check_same_variable(k) for k, s in iterator): return True # "type: ignore" because mypy does not see that the preceding if-statement # means all k.condition() actions are FilterFlags. - if len(set(k.condition().flag for k, s in self.epsilon)) > 1: # type: ignore - return True - if len(self.epsilon) != len(set(k.condition().value for k, s in self.epsilon)): # type: ignore + pairs = itertools.combinations((k for k, s in self.epsilon), 2) + if any(not k1.check_different_values(k2) for k1, k2 in pairs): return True else: try: diff --git a/jsparagus/runtime.py b/jsparagus/runtime.py index 23c86c600..a0e7e42fd 100644 --- a/jsparagus/runtime.py +++ b/jsparagus/runtime.py @@ -209,6 +209,9 @@ def close(self, lexer): assert isinstance(self.stack[1].term, Nt) return self.stack[1].value + def top_state(self): + return self.stack[-1].state + def check_not_on_new_line(self, lexer, peek): if peek <= 0: raise ValueError("check_not_on_new_line got an impossible peek offset")