IThror10 · IThror10 · May 29, 2024 · May 29, 2024 · May 29, 2024 · May 29, 2024
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -9,10 +9,10 @@ jobs:
     steps:
     - name: Set up Git repository
       uses: actions/checkout@v2
-    - name: Set up Python 3.9
+    - name: Set up Python 3.10.12
       uses: actions/setup-python@v2
       with:
-        python-version: "3.9"
+        python-version: "3.10.12"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

diff --git a/project/task3.py b/project/task3.py
@@ -1,84 +1,154 @@
-from scipy.sparse import dok_matrix, kron
 from pyformlang.finite_automaton import (
     DeterministicFiniteAutomaton as DFA,
-    NondeterministicFiniteAutomaton as NDFA,
+    NondeterministicFiniteAutomaton as NFA,
     State,
+    Symbol,
 )
+from networkx import MultiDiGraph
+from scipy.sparse import dok_matrix, kron
+from typing import Iterable
+from functools import reduce
+
+from project.task2 import regex_to_dfa, graph_to_nfa
 
 
 class FiniteAutomaton:
-    def __init__(self, dfa=None):
-        if not isinstance(dfa, DFA) and not isinstance(dfa, NDFA):
+    def __init__(self, fa=None) -> None:
+        self.lbl = True
+        self.matrices = {}
+        if fa is None:
+            self.start_states = set()
+            self.final_states = set()
+            self.state_to_index = {}
             return
 
-        states = dfa.to_dict()
-        self.mapping = {v: i for i, v in enumerate(dfa.states)}
-        self.sparse = dict()
-
-        for label in dfa.symbols:
-            self.sparse[label] = dok_matrix(
-                (len(dfa.states), len(dfa.states)), dtype=bool
-            )
-            for u, edges in states.items():
-                if label in edges:
-                    for v in (
-                        edges[label]
-                        if isinstance(edges[label], set)
-                        else {edges[label]}
-                    ):
-                        self.sparse[label][self.mapping[u], self.mapping[v]] = True
-
-        self.start_states = dfa.start_states
-        self.final_states = dfa.final_states
-
-    def accepts(self, word):
-        return self.to_ndfa().accepts("".join(list(word)))
-
-    def is_empty(self):
-        return len(self.sparse) == 0
-
-    def mapping_for(self, u):
-        return self.mapping[State(u)]
-
-    def to_ndfa(self):
-        ndfa = NDFA()
-        for label in self.sparse.keys():
-            m_size = self.sparse[label].shape[0]
-            for u in range(m_size):
-                for v in range(m_size):
-                    if self.sparse[label][u, v]:
-                        ndfa.add_transition(
-                            self.mapping_for(u), label, self.mapping_for(v)
-                        )
-
-        for s in self.start_states:
-            ndfa.add_start_state(self.mapping_for(s))
-        for s in self.final_states:
-            ndfa.add_final_state(self.mapping_for(s))
-        return ndfa
-
-
-def intersect_automata(fa1: FiniteAutomaton, fa2: FiniteAutomaton):
-    labels = fa1.sparse.keys() & fa2.sparse.keys()
-    fa = FiniteAutomaton()
-    fa.sparse = dict()
-    fa.start_states = set()
-    fa.final_states = set()
-    fa.mapping = dict()
+        self.start_states = fa.start_states
+        self.final_states = fa.final_states
+
+        self.state_to_index = {state: index for index, state in enumerate(fa.states)}
+        self.index_to_state = {
+            index: state for state, index in self.state_to_index.items()
+        }
+        n_states = len(fa.states)
+
+        for from_state, transitions in fa.to_dict().items():
+            for symbol, to_states in transitions.items():
+                if symbol not in self.matrices.keys():
+                    self.matrices[symbol] = dok_matrix((n_states, n_states), dtype=bool)
+                if isinstance(fa, DFA):
+                    self.matrices[symbol][
+                        self.state_to_index[from_state], self.state_to_index[to_states]
+                    ] = True
+                else:
+                    for to_state in to_states:
+                        self.matrices[symbol][
+                            self.state_to_index[from_state],
+                            self.state_to_index[to_state],
+                        ] = True
+
+    def to_nfa(self) -> NFA:
+        nfa = NFA()
+
+        for state in self.start_states:
+            nfa.add_start_state(state)
+
+        for state in self.final_states:
+            nfa.add_final_state(state)
+
+        for label, matrix in self.matrices.items():
+            n, m = matrix.shape
+            for from_state in range(n):
+                for to_state in range(m):
+                    if matrix[from_state, to_state]:
+                        nfa.add_transition(State(from_state), label, State(to_state))
+
+        return nfa
+
+    def set_state_to_index(self, new_state_to_index):
+        self.state_to_index = new_state_to_index
+        self.index_to_state = {
+            index: state for state, index in self.state_to_index.items()
+        }
+
+    def set_true(self, label, row, column):
+        self.matrices[label][row, column] = True
+
+    def add_label_if_not_exist(self, label, dim=None):
+        if label not in self.matrices:
+            dim = dim or len(self)
+            self.matrices[label] = dok_matrix((dim, dim), dtype=bool)
+
+    def accepts(self, word: Iterable[Symbol]) -> bool:
+        return self.to_nfa().accepts(word)
 
+    def is_empty(self) -> bool:
+        return self.to_nfa().is_empty()
+
+    def get_index(self, state) -> int:
+        return self.state_to_index.get(state, 0)
+
+    def get_state_by_index(self, index: int):
+        return self.index_to_state[index]
+
+    def __len__(self):
+        return len(self.state_to_index)
+
+    def labels(self):
+        return self.state_to_index.keys() if self.lbl else self.matrices.keys()
+
+    def get_transitive_closure(self):
+        if len(self.matrices.values()) == 0:
+            return dok_matrix((0, 0), dtype=bool)
+
+        closure = reduce(lambda x, y: x + y, self.matrices.values())
+
+        while True:
+            prev_zero_count = closure.count_nonzero()
+            closure += closure @ closure
+            if prev_zero_count == closure.count_nonzero():
+                return closure
+
+
+def intersect_automata(
+    auto1: FiniteAutomaton, auto2: FiniteAutomaton, lbl: bool = True
+) -> FiniteAutomaton:
+    auto1.lbl = auto2.lbl = not lbl
+    res = FiniteAutomaton()
+
+    for state1, index1 in auto1.state_to_index.items():
+        for state2, index2 in auto2.state_to_index.items():
+            index = len(auto2) * index1 + index2
+            res.state_to_index[index] = index
+
+            if state1 in auto1.start_states and state2 in auto2.start_states:
+                res.start_states.add(State(index))
+
+            if state1 in auto1.final_states and state2 in auto2.final_states:
+                res.final_states.add(State(index))
+
+    labels = auto1.labels() & auto2.labels()
     for label in labels:
-        fa.sparse[label] = kron(fa1.sparse[label], fa2.sparse[label], "csr")
+        res.matrices[label] = kron(auto1.matrices[label], auto2.matrices[label], "csr")
+
+    return res
 
-    for u, i in fa1.mapping.items():
-        for v, j in fa2.mapping.items():
 
-            k = len(fa2.mapping) * i + j
-            fa.mapping[k] = k
+def paths_ends(
+    graph: MultiDiGraph, start: set[int], final: set[int], regex: str
+) -> list[tuple[object, object]]:
+    dfa = FiniteAutomaton(regex_to_dfa(regex))
+    nfa = FiniteAutomaton(graph_to_nfa(graph, start, final))
+    intersection = intersect_automata(nfa, dfa, lbl=False)
 
-            if u in fa1.start_states and v in fa2.start_states:
-                fa.start_states.add(State(k))
+    if intersection.is_empty():
+        return []
 
-            if u in fa1.final_states and v in fa2.final_states:
-                fa.final_states.add(State(k))
+    from_states, to_states = intersection.get_transitive_closure().nonzero()
+    n = len(dfa)
 
-    return fa
+    return [
+        (nfa.get_state_by_index(from_state // n), nfa.get_state_by_index(to_state // n))
+        for from_state, to_state in zip(from_states, to_states)
+        if from_state in intersection.start_states
+        and to_state in intersection.final_states
+    ]
diff --git a/project/task4.py b/project/task4.py
@@ -0,0 +1,25 @@
+from project.task3 import FiniteAutomaton, intersect_automata
+
+
+def reachability_with_constraints(
+    fa: FiniteAutomaton, constraints_fa: FiniteAutomaton
+) -> dict[int, set[int]]:
+    intersection = intersect_automata(fa, constraints_fa, lbl=False)
+    res = {state: set() for state in fa.start_states}
+
+    if intersection.is_empty():
+        return res
+
+    from_states, to_states = intersection.get_transitive_closure().nonzero()
+    n = len(constraints_fa)
+
+    for from_state, to_state in zip(from_states, to_states):
+        if (
+            from_state in intersection.start_states
+            and to_state in intersection.final_states
+        ):
+            res[fa.get_state_by_index(from_state // n)].add(
+                fa.get_state_by_index(to_state // n)
+            )
+
+    return res
diff --git a/project/task6.py b/project/task6.py
@@ -0,0 +1,57 @@
+from pyformlang.cfg import CFG, Variable, Terminal, Epsilon
+
+from collections import defaultdict
+from typing import Tuple
+
+
+def cfg_to_weak_normal_form(initCfg, start="S") -> CFG:
+    elimCfg = initCfg.eliminate_unit_productions().remove_useless_symbols()
+    return CFG(
+        productions=set(
+            elimCfg._decompose_productions(
+                elimCfg._get_productions_with_only_single_terminals()
+            )
+        ),
+        start_symbol=Variable(start),
+    )
+
+
+def cfpq_with_hellings(cfg, graph, start_nodes=None, final_nodes=None):
+    terminal, epsilon, mult, temp = defaultdict(set), set(), defaultdict(set), set()
+    for prod in cfg_to_weak_normal_form(cfg).productions:
+        if len(prod.body) == 2:
+            mult[prod.head].add((prod.body[0], prod.body[1]))
+        elif len(prod.body) == 1 and isinstance(prod.body[0], Terminal):
+            terminal[prod.head].add(prod.body[0])
+        elif len(prod.body) == 1 and isinstance(prod.body[0], Epsilon):
+            epsilon.add(prod.body[0])
+
+    cur = {
+        (n, start, end)
+        for (start, end, label) in graph.edges.data("label")
+        for n in terminal
+        if label in terminal[n]
+    }.union({(n, node, node) for n in epsilon for node in graph.nodes})
+
+    copy = cur.copy()
+    while len(copy) != 0:
+        n1, v1, u1 = copy.pop()
+        for n2, v2, u2 in cur:
+            if v1 == u2:
+                for N_k in mult:
+                    if (n2, n1) in mult[N_k] and (N_k, v2, v1) not in r:
+                        copy.add((N_k, v2, u1))
+                        temp.add((N_k, v2, u1))
+
+    return {
+        (start, end)
+        for (n, start, end) in cur.union(temp)
+        if Variable(n) == cfg.start_symbol
+        and (start_nodes is None or start in start_nodes)
+        and (final_nodes is None or end in final_nodes)
+    }
+
+
+def read_cfgrammar(filePath, start="S"):
+    with open(filePath, "r") as file:
+        return CFG.from_text(file.read(), Variable(start))
diff --git a/project/task7.py b/project/task7.py
@@ -0,0 +1,48 @@
+from scipy.sparse import lil_matrix
+from pyformlang.cfg import CFG, Terminal
+import networkx as nx
+from typing import Set, Tuple
+from project.task6 import cfg_to_weak_normal_form
+
+
+def cfpq_with_matrix(cfg, graph, start_nodes=None, final_nodes=None):
+    wnf = cfg_to_weak_normal_form(cfg)
+    mapVarIndex = {
+        variable: index
+        for index, variable in enumerate(
+            {production.head for production in wnf.productions}
+        )
+    }
+
+    matrices = {}
+    n = graph.number_of_nodes()
+    for production in wnf.productions:
+        matrices[production.head] = lil_matrix((n, n), dtype=bool)
+        if len(production.body) == 1 and isinstance(production.body[0], Terminal):
+            for start, end, label in graph.edges.data("label"):
+                if str(production.body[0]) == str(label):
+                    matrices[production.head][start, end] = True
+
+    changed = True
+    while changed:
+        changed = False
+        for production in wnf.productions:
+            if (
+                len(production.body) == 2
+                and production.body[0] in mapVarIndex
+                and production.body[1] in mapVarIndex
+            ):
+                prev = matrices[production.head].nnz
+                matrices[production.head] += (
+                    matrices[production.body[0]] * matrices[production.body[1]]
+                )
+                changed = changed or (prev != matrices[production.head].nnz)
+
+    return {
+        (row, column)
+        for variable, matrix in matrices.items()
+        for row, column in zip(matrix.tocoo().row, matrix.tocoo().col)
+        if variable == wnf.start_symbol
+        and (start_nodes is None or row in start_nodes)
+        and (final_nodes is None or column in final_nodes)
+    }