Skip to content

Commit

Permalink
Add FilterState action.
Browse files Browse the repository at this point in the history
  • Loading branch information
nbp committed Jun 22, 2020
1 parent 02ab137 commit f30728f
Show file tree
Hide file tree
Showing 9 changed files with 162 additions and 30 deletions.
1 change: 1 addition & 0 deletions crates/generated_parser/src/traits/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,6 @@ pub trait ParserTrait<'alloc, Value> {
fn pop(&mut self) -> TermValue<Value>;
fn replay(&mut self, tv: TermValue<Value>);
fn epsilon(&mut self, state: usize);
fn top_state(&self) -> usize;
fn check_not_on_new_line(&mut self, peek: usize) -> Result<'alloc, bool>;
}
3 changes: 3 additions & 0 deletions crates/parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ impl<'alloc> ParserTrait<'alloc, StackValue<'alloc>> for Parser<'alloc> {
fn epsilon(&mut self, state: usize) {
*self.state_stack.last_mut().unwrap() = state;
}
fn top_state(&self) -> usize {
self.state()
}
fn check_not_on_new_line(&mut self, peek: usize) -> Result<'alloc, bool> {
let sv = {
let stack = self.node_stack.stack_slice();
Expand Down
3 changes: 3 additions & 0 deletions crates/parser/src/simulator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ impl<'alloc, 'parser> ParserTrait<'alloc, ()> for Simulator<'alloc, 'parser> {
}
*self.sim_state_stack.last_mut().unwrap() = state;
}
fn top_state(&self) -> usize {
self.state()
}
fn check_not_on_new_line(&mut self, _peek: usize) -> Result<'alloc, bool> {
Ok(true)
}
Expand Down
68 changes: 66 additions & 2 deletions jsparagus/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import typing
import dataclasses

from .ordered import OrderedFrozenSet
from .grammar import Element, ErrorSymbol, InitNt, Nt
from . import types, grammar

Expand Down Expand Up @@ -64,6 +65,16 @@ def condition(self) -> Action:
"Return the conditional action."
raise TypeError("Action.condition not implemented")

def check_same_variable(self, other: Action) -> bool:
"Return whether both conditionals are checking the same variable."
assert self.is_condition()
raise TypeError("Action.check_same_variable not implemented")

def check_different_values(self, other: Action) -> bool:
"Return whether these 2 conditions are mutually exclusive."
assert self.is_condition()
raise TypeError("Action.check_different_values not implemented")

def follow_edge(self) -> bool:
"""Whether the execution of this action resume following the epsilon transition
(True) or if it breaks the graph epsilon transition (False) and returns
Expand All @@ -78,7 +89,7 @@ def update_stack_with(self) -> StackDiff:
"""Returns a StackDiff which represents the mutation to be applied to the
parser stack."""
assert self.update_stack()
raise TypeError("Action::update_stack_with not implemented")
raise TypeError("Action.update_stack_with not implemented")

def shifted_action(self, shifted_term: Element) -> ShiftedAction:
"""Transpose this action with shifting the given terminal or Nt.
Expand Down Expand Up @@ -235,7 +246,8 @@ def __init__(self, terms: typing.FrozenSet[str], accept: bool):

def is_inconsistent(self) -> bool:
# A lookahead restriction cannot be encoded in code, it has to be
# solved using fix_with_lookahead.
# solved using fix_with_lookahead, which encodes the lookahead
# resolution in the generated parse table.
return True

def is_condition(self) -> bool:
Expand All @@ -244,6 +256,12 @@ def is_condition(self) -> bool:
def condition(self) -> Lookahead:
return self

def check_same_variable(self, other: Action) -> bool:
raise TypeError("Lookahead.check_same_variables: Lookahead are always inconsistent")

def check_different_values(self, other: Action) -> bool:
raise TypeError("Lookahead.check_different_values: Lookahead are always inconsistent")

def __str__(self) -> str:
return "Lookahead({}, {})".format(self.terms, self.accept)

Expand Down Expand Up @@ -281,6 +299,12 @@ def is_condition(self) -> bool:
def condition(self) -> CheckNotOnNewLine:
return self

def check_same_variable(self, other: Action) -> bool:
return isinstance(other, CheckNotOnNewLine) and self.offset == other.offset

def check_different_values(self, other: Action) -> bool:
return False

def shifted_action(self, shifted_term: Element) -> ShiftedAction:
if isinstance(shifted_term, Nt):
return True
Expand All @@ -290,6 +314,39 @@ def __str__(self) -> str:
return "CheckNotOnNewLine({})".format(self.offset)


class FilterStates(Action):
"""Check whether the stack at a given depth match the state value, if so
transition to the destination, otherwise check other states."""
__slots__ = ['states']

states: OrderedFrozenSet[StateId]

def __init__(self, states: typing.Iterable[StateId]):
super().__init__()
# Set of states which can follow this transition.
self.states = OrderedFrozenSet(sorted(states))

def is_condition(self) -> bool:
return True

def condition(self) -> FilterStates:
return self

def check_same_variable(self, other: Action) -> bool:
return isinstance(other, FilterStates)

def check_different_values(self, other: Action) -> bool:
assert isinstance(other, FilterStates)
return self.states.is_disjoint(other.states)

def rewrite_state_indexes(self, state_map: typing.Dict[StateId, StateId]) -> FilterStates:
states = list(state_map[s] for s in self.states)
return FilterStates(states)

def __str__(self) -> str:
return "FilterStates({})".format(self.states)


class FilterFlag(Action):
"""Define a filter which check for one value of the flag, and continue to the
next state if the top of the flag stack matches the expected value."""
Expand All @@ -309,6 +366,13 @@ def is_condition(self) -> bool:
def condition(self) -> FilterFlag:
return self

def check_same_variable(self, other: Action) -> bool:
return isinstance(other, FilterFlag) and self.flag == other.flag

def check_different_values(self, other: Action) -> bool:
assert isinstance(other, FilterFlag)
return self.value != other.value

def __str__(self) -> str:
return "FilterFlag({}, {})".format(self.flag, self.value)

Expand Down
20 changes: 18 additions & 2 deletions jsparagus/aps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@

import typing
from dataclasses import dataclass
from .grammar import Nt
from .lr0 import ShiftedTerm, Term
from .actions import Action
from .actions import Action, FilterStates

# Avoid circular reference between this module and parse_table.py
if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -191,6 +190,7 @@ def shift_next(self, pt: ParseTable) -> typing.Iterator[APS]:
st, sh, la, rp, hs = self.stack, self.shift, self.lookahead, self.replay, self.history
last_edge = sh[-1]
state = pt.states[last_edge.src]
state_match_shift_end = True
if self.replay == []:
for term, to in state.shifted_edges():
edge = Edge(last_edge.src, term)
Expand Down Expand Up @@ -288,6 +288,22 @@ def shift_next(self, pt: ParseTable) -> typing.Iterator[APS]:
new_rp = new_rp + rp
new_la = la[:max(len(la) - replay, 0)]
yield APS(new_st, new_sh, new_la, new_rp, hs + [edge], True)
elif isinstance(a, FilterStates):
# FilterStates is added by the graph transformation and is
# expected to be added after the replacement of
# Reduce(Unwind(...)) by Unwind, FilterStates and Replay
# actions. Thus, at the time when FilterStates is encountered,
# we do not expect `self.states` to match the last element of
# the `shift` list to match.
assert not state_match_shift_end

# Emulate FilterStates condition, which is to branch to the
# destination if the state value from the top of the stack is
# in the list of states of this condition.
if self.shift[-1].src in a.states:
# TODO: add the to-state destination common to all actions
# which are following edges.
yield APS(st, sh, la, rp, hs + [edge], self.reducing)
else:
edge_to = Edge(to, None)
yield APS(st, prev_sh + [edge_to], la, rp, hs + [edge], self.reducing)
Expand Down
26 changes: 16 additions & 10 deletions jsparagus/emit/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import typing

from ..grammar import ErrorSymbol, Nt, Some
from ..actions import (Accept, Action, CheckNotOnNewLine, FilterFlag, FunCall,
from ..actions import (Accept, Action, CheckNotOnNewLine, FilterFlag, FilterStates, FunCall,
Lookahead, OutputExpr, PopFlag, PushFlag, Reduce, Seq)
from ..runtime import ErrorToken, ErrorTokenClass
from ..ordered import OrderedSet
Expand All @@ -31,6 +31,17 @@ def write_python_parse_table(out: io.TextIOBase, parse_table: ParseTable) -> Non

methods: OrderedSet[FunCall] = OrderedSet()

def write_epsilon_transition(indent: str, dest: StateId):
if parse_table.states[dest].epsilon != []:
# This is a transition to an action.
out.write("{}state_{}_actions(parser, lexer)\n".format(indent, dest))
else:
# This is a transition to a shift.
out.write("{}top = parser.stack.pop()\n".format(indent))
out.write("{}top = StateTermValue({}, top.term, top.value, top.new_line)\n"
.format(indent, dest))
out.write("{}parser.stack.append(top)\n".format(indent))

def write_action(act: Action, indent: str = "") -> typing.Tuple[str, bool]:
assert not act.is_inconsistent()
if isinstance(act, Reduce):
Expand All @@ -52,6 +63,9 @@ def write_action(act: Action, indent: str = "") -> typing.Tuple[str, bool]:
out.write("{}if not parser.check_not_on_new_line(lexer, {}):\n".format(indent, -act.offset))
out.write("{} return\n".format(indent))
return indent, True
if isinstance(act, FilterStates):
out.write("{}if parser.top_state() in [{}]:\n".format(indent, ", ".join(map(str, act.states))))
return indent + " ", True
if isinstance(act, FilterFlag):
out.write("{}if parser.flags[{}][-1] == {}:\n".format(indent, act.flag, act.value))
return indent + " ", True
Expand Down Expand Up @@ -113,15 +127,7 @@ def map_with_offset(args: typing.Iterable[OutputExpr]) -> typing.Iterator[str]:
print(parse_table.debug_context(state.index, "\n", "# "))
raise
if fallthrough:
if parse_table.states[dest].epsilon != []:
# This is a transition to an action.
out.write("{}state_{}_actions(parser, lexer)\n".format(indent, dest))
else:
# This is a transition to a shift.
out.write("{}top = parser.stack.pop()\n".format(indent))
out.write("{}top = StateTermValue({}, top.term, top.value, top.new_line)\n"
.format(indent, dest))
out.write("{}parser.stack.append(top)\n".format(indent))
write_epsilon_transition(indent, dest)
out.write("{}return\n".format(indent))
out.write("\n")

Expand Down
51 changes: 42 additions & 9 deletions jsparagus/emit/rust.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
import re
import unicodedata
import sys
from contextlib import contextmanager

from ..runtime import (ERROR, ErrorToken, SPECIAL_CASE_TAG)
from ..ordered import OrderedSet

from ..grammar import (CallMethod, Some, is_concrete_element, Nt, InitNt, Optional, End,
ErrorSymbol)
from ..actions import (Accept, Action, Reduce, Lookahead, CheckNotOnNewLine, FilterFlag, PushFlag,
PopFlag, FunCall, Seq)
from ..grammar import (Some, Nt, InitNt, End, ErrorSymbol)
from ..actions import (Accept, Action, Reduce, CheckNotOnNewLine, FilterStates,
PushFlag, PopFlag, FunCall, Seq)

from .. import types

Expand Down Expand Up @@ -75,6 +75,20 @@
'...': 'Ellipsis',
}


@contextmanager
def indent(writer):
"""This function is meant to be used with the `with` keyword of python, and
allow the user of it to add an indentation level to the code which is
enclosed in the `with` statement.
This has the advantage that the indentation of the python code is reflected
to the generated code when `with indent(self):` is used. """
writer.indent += 1
yield None
writer.indent -= 1


class RustActionWriter:
"""Write epsilon state transitions for a given action function."""
ast_builder = types.Type("AstBuilderDelegate", (types.Lifetime("alloc"),))
Expand Down Expand Up @@ -178,11 +192,28 @@ def write_condition(self, state, first_act):
assert -act.offset > 0
self.write("// {}", str(act))
self.write("if !parser.check_not_on_new_line({})? {{", -act.offset)
self.indent += 1
self.write("return Ok(false);")
self.indent -= 1
with indent(self):
self.write("return Ok(false);")
self.write("}")
self.write_epsilon_transition(dest)
elif isinstance(first_act, FilterStates):
value = 0
if len(state.epsilon) == 1:
# This is an attempt to avoid huge unending compilations.
_, dest = next(iter(state.epsilon), (None, None))
self.write("// parser.top_state() in [{}]", " | ".join(map(str, first_act.states)))
self.write_epsilon_transition(dest)
else:
self.write("match parser.top_state() {")
with indent(self):
for act, dest in state.edges():
assert first_act.check_same_variable(act)
self.write("{} => {{", " | ".join(map(str, act.states)))
with indent(self):
self.write_epsilon_transition(dest)
self.write("}")
self.write("_ => panic!(\"Unexpected state value.\")")
self.write("}")
else:
raise ValueError("Unexpected action type")

Expand Down Expand Up @@ -660,7 +691,8 @@ def actions(self):
table_holder_name = self.to_camel_case(mode)
table_holder_type = table_holder_name + "<'alloc, Handler>"
self.write(0, "struct {} {{", table_holder_type)
self.write(1, "fns: [fn(&mut Handler) -> Result<'alloc, bool>; {}]", self.action_from_shift_count)
self.write(1, "fns: [fn(&mut Handler) -> Result<'alloc, bool>; {}]",
self.action_from_shift_count)
self.write(0, "}")
self.write(0, "impl<'alloc, Handler> {}", table_holder_type)
self.write(0, "where")
Expand All @@ -681,7 +713,8 @@ def actions(self):
self.write(0, "where")
self.write(1, "Handler: {}", traits_text)
self.write(0, "{")
self.write(1, "{}::<'alloc, Handler>::TABLE.fns[state - {}](parser)", table_holder_name, start_at)
self.write(1, "{}::<'alloc, Handler>::TABLE.fns[state - {}](parser)",
table_holder_name, start_at)
self.write(0, "}")
self.write(0, "")
for state in self.states[self.shift_count:]:
Expand Down
17 changes: 10 additions & 7 deletions jsparagus/parse_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
import pickle
import typing
import itertools

from . import types
from .utils import consume, keep_until, split
Expand Down Expand Up @@ -115,18 +116,20 @@ def is_inconsistent(self) -> bool:
elif len(self.epsilon) > 1:
if any(k.is_inconsistent() for k, s in self.epsilon):
return True
# If all the out-going edges are FilterFlags, with the same flag
# and different values, then this state remains consistent, as this
# can be implemented as a deterministic switch statement.
# NOTE: We can accept multiple conditions as epsilon transitions
# iff they are checking the same variable with non-overlapping
# values. This implies that we can implement these conditions as a
# deterministic switch statement in the code emitter.
if any(not k.is_condition() for k, s in self.epsilon):
return True
if any(not isinstance(k.condition(), FilterFlag) for k, s in self.epsilon):
iterator = iter(self.epsilon)
first, _ = next(iterator)
if any(not first.check_same_variable(k) for k, s in iterator):
return True
# "type: ignore" because mypy does not see that the preceding if-statement
# means all k.condition() actions are FilterFlags.
if len(set(k.condition().flag for k, s in self.epsilon)) > 1: # type: ignore
return True
if len(self.epsilon) != len(set(k.condition().value for k, s in self.epsilon)): # type: ignore
pairs = itertools.combinations((k for k, s in self.epsilon), 2)
if any(not k1.check_different_values(k2) for k1, k2 in pairs):
return True
else:
try:
Expand Down
3 changes: 3 additions & 0 deletions jsparagus/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ def close(self, lexer):
assert isinstance(self.stack[1].term, Nt)
return self.stack[1].value

def top_state(self):
return self.stack[-1].state

def check_not_on_new_line(self, lexer, peek):
if peek <= 0:
raise ValueError("check_not_on_new_line got an impossible peek offset")
Expand Down

0 comments on commit f30728f

Please sign in to comment.