IThror10 · IThror10 · May 29, 2024 · May 29, 2024 · May 29, 2024 · May 29, 2024
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -9,14 +9,16 @@ jobs:
     steps:
     - name: Set up Git repository
       uses: actions/checkout@v2
-    - name: Set up Python 3.9
+    - name: Set up Python 3.10.12
       uses: actions/setup-python@v2
       with:
-        python-version: "3.9"
+        python-version: "3.10.12"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         python -m pip install -r ./requirements.txt
+    - name: Generate parser
+      run: antlr4 -Dlanguage=Python3 project/language.g4 -visitor -o project/lang
     - name: Test with pytest
       run: |
         python ./scripts/run_tests.py
diff --git a/project/language.g4 b/project/language.g4
@@ -0,0 +1,21 @@
+grammar language;
+
+prog: stmt*;
+
+stmt: bind | add | remove | declare;
+	declare: 'let' VAR 'is' 'graph';
+	bind: 'let' VAR '=' expr;
+	remove: 'remove' ('vertex' | 'edge' | 'vertices') expr 'from' VAR;
+	add: 'add' ('vertex' | 'edge') expr 'to' VAR;
+
+expr: NUM | CHAR | VAR | edge_expr | set_expr | regexp | select;
+	set_expr: '[' expr (',' expr)* ']';
+	edge_expr: '(' expr ',' expr ',' expr ')';
+	regexp: CHAR | VAR | '(' regexp ')' | regexp '|' regexp | regexp '^' range | regexp '.' regexp | regexp '&' regexp;
+		range: '[' NUM '..' NUM? ']';
+	select: v_filter? v_filter? 'return' VAR (',' VAR)? 'where' VAR 'reachable' 'from' VAR 'in' VAR 'by' expr;
+		v_filter: 'for' VAR 'in' expr;
+
+VAR: [a-z] [a-z0-9]*;
+NUM: ([1-9][0-9]*) | '0';
+CHAR: '\u0022' [a-z] '\u0022';
diff --git a/project/task11.py b/project/task11.py
@@ -0,0 +1,41 @@
+from project.lang.project.languageVisitor import languageVisitor
+from project.lang.project.languageLexer import languageLexer
+from project.lang.project.languageParser import languageParser
+
+from antlr4 import *
+from antlr4.InputStream import InputStream
+
+
+class NodeCounter(languageVisitor):
+    def __init__(self):
+        super().__init__()
+        self.counter = 0
+
+    def enterEveryRule(self, _):
+        self.counter += 1
+
+
+class TreeToProgVisitor(languageVisitor):
+    def __init__(self):
+        super().__init__()
+        self.visits = []
+
+    def enterEveryRule(self, rule):
+        self.visits.append(rule.get_text())
+
+
+def nodes_count(tree: ParserRuleContext) -> int:
+    visitor = NodeCounter()
+    tree.accept(visitor)
+    return visitor.counter
+
+
+def tree_to_prog(tree: ParserRuleContext) -> str:
+    visitor = TreeToProgVisitor()
+    tree.accept(visitor)
+    return "".join(visitor.visits)
+
+
+def prog_to_tree(program: str) -> tuple[ParserRuleContext, bool]:
+    parser = languageParser(CommonTokenStream(languageLexer(InputStream(program))))
+    return parser.prog(), parser.getNumberOfSyntaxErrors() == 0
diff --git a/project/task3.py b/project/task3.py
@@ -1,84 +1,154 @@
-from scipy.sparse import dok_matrix, kron
 from pyformlang.finite_automaton import (
     DeterministicFiniteAutomaton as DFA,
-    NondeterministicFiniteAutomaton as NDFA,
+    NondeterministicFiniteAutomaton as NFA,
     State,
+    Symbol,
 )
+from networkx import MultiDiGraph
+from scipy.sparse import dok_matrix, kron
+from typing import Iterable
+from functools import reduce
+
+from project.task2 import regex_to_dfa, graph_to_nfa
 
 
 class FiniteAutomaton:
-    def __init__(self, dfa=None):
-        if not isinstance(dfa, DFA) and not isinstance(dfa, NDFA):
+    def __init__(self, fa=None) -> None:
+        self.lbl = True
+        self.matrices = {}
+        if fa is None:
+            self.start_states = set()
+            self.final_states = set()
+            self.state_to_index = {}
             return
 
-        states = dfa.to_dict()
-        self.mapping = {v: i for i, v in enumerate(dfa.states)}
-        self.sparse = dict()
-
-        for label in dfa.symbols:
-            self.sparse[label] = dok_matrix(
-                (len(dfa.states), len(dfa.states)), dtype=bool
-            )
-            for u, edges in states.items():
-                if label in edges:
-                    for v in (
-                        edges[label]
-                        if isinstance(edges[label], set)
-                        else {edges[label]}
-                    ):
-                        self.sparse[label][self.mapping[u], self.mapping[v]] = True
-
-        self.start_states = dfa.start_states
-        self.final_states = dfa.final_states
-
-    def accepts(self, word):
-        return self.to_ndfa().accepts("".join(list(word)))
-
-    def is_empty(self):
-        return len(self.sparse) == 0
-
-    def mapping_for(self, u):
-        return self.mapping[State(u)]
-
-    def to_ndfa(self):
-        ndfa = NDFA()
-        for label in self.sparse.keys():
-            m_size = self.sparse[label].shape[0]
-            for u in range(m_size):
-                for v in range(m_size):
-                    if self.sparse[label][u, v]:
-                        ndfa.add_transition(
-                            self.mapping_for(u), label, self.mapping_for(v)
-                        )
-
-        for s in self.start_states:
-            ndfa.add_start_state(self.mapping_for(s))
-        for s in self.final_states:
-            ndfa.add_final_state(self.mapping_for(s))
-        return ndfa
-
-
-def intersect_automata(fa1: FiniteAutomaton, fa2: FiniteAutomaton):
-    labels = fa1.sparse.keys() & fa2.sparse.keys()
-    fa = FiniteAutomaton()
-    fa.sparse = dict()
-    fa.start_states = set()
-    fa.final_states = set()
-    fa.mapping = dict()
+        self.start_states = fa.start_states
+        self.final_states = fa.final_states
+
+        self.state_to_index = {state: index for index, state in enumerate(fa.states)}
+        self.index_to_state = {
+            index: state for state, index in self.state_to_index.items()
+        }
+        n_states = len(fa.states)
+
+        for from_state, transitions in fa.to_dict().items():
+            for symbol, to_states in transitions.items():
+                if symbol not in self.matrices.keys():
+                    self.matrices[symbol] = dok_matrix((n_states, n_states), dtype=bool)
+                if isinstance(fa, DFA):
+                    self.matrices[symbol][
+                        self.state_to_index[from_state], self.state_to_index[to_states]
+                    ] = True
+                else:
+                    for to_state in to_states:
+                        self.matrices[symbol][
+                            self.state_to_index[from_state],
+                            self.state_to_index[to_state],
+                        ] = True
+
+    def to_nfa(self) -> NFA:
+        nfa = NFA()
+
+        for state in self.start_states:
+            nfa.add_start_state(state)
+
+        for state in self.final_states:
+            nfa.add_final_state(state)
+
+        for label, matrix in self.matrices.items():
+            n, m = matrix.shape
+            for from_state in range(n):
+                for to_state in range(m):
+                    if matrix[from_state, to_state]:
+                        nfa.add_transition(State(from_state), label, State(to_state))
+
+        return nfa
+
+    def set_state_to_index(self, new_state_to_index):
+        self.state_to_index = new_state_to_index
+        self.index_to_state = {
+            index: state for state, index in self.state_to_index.items()
+        }
+
+    def set_true(self, label, row, column):
+        self.matrices[label][row, column] = True
+
+    def add_label_if_not_exist(self, label, dim=None):
+        if label not in self.matrices:
+            dim = dim or len(self)
+            self.matrices[label] = dok_matrix((dim, dim), dtype=bool)
+
+    def accepts(self, word: Iterable[Symbol]) -> bool:
+        return self.to_nfa().accepts(word)
 
+    def is_empty(self) -> bool:
+        return self.to_nfa().is_empty()
+
+    def get_index(self, state) -> int:
+        return self.state_to_index.get(state, 0)
+
+    def get_state_by_index(self, index: int):
+        return self.index_to_state[index]
+
+    def __len__(self):
+        return len(self.state_to_index)
+
+    def labels(self):
+        return self.state_to_index.keys() if self.lbl else self.matrices.keys()
+
+    def get_transitive_closure(self):
+        if len(self.matrices.values()) == 0:
+            return dok_matrix((0, 0), dtype=bool)
+
+        closure = reduce(lambda x, y: x + y, self.matrices.values())
+
+        while True:
+            prev_zero_count = closure.count_nonzero()
+            closure += closure @ closure
+            if prev_zero_count == closure.count_nonzero():
+                return closure
+
+
+def intersect_automata(
+    auto1: FiniteAutomaton, auto2: FiniteAutomaton, lbl: bool = True
+) -> FiniteAutomaton:
+    auto1.lbl = auto2.lbl = not lbl
+    res = FiniteAutomaton()
+
+    for state1, index1 in auto1.state_to_index.items():
+        for state2, index2 in auto2.state_to_index.items():
+            index = len(auto2) * index1 + index2
+            res.state_to_index[index] = index
+
+            if state1 in auto1.start_states and state2 in auto2.start_states:
+                res.start_states.add(State(index))
+
+            if state1 in auto1.final_states and state2 in auto2.final_states:
+                res.final_states.add(State(index))
+
+    labels = auto1.labels() & auto2.labels()
     for label in labels:
-        fa.sparse[label] = kron(fa1.sparse[label], fa2.sparse[label], "csr")
+        res.matrices[label] = kron(auto1.matrices[label], auto2.matrices[label], "csr")
+
+    return res
 
-    for u, i in fa1.mapping.items():
-        for v, j in fa2.mapping.items():
 
-            k = len(fa2.mapping) * i + j
-            fa.mapping[k] = k
+def paths_ends(
+    graph: MultiDiGraph, start: set[int], final: set[int], regex: str
+) -> list[tuple[object, object]]:
+    dfa = FiniteAutomaton(regex_to_dfa(regex))
+    nfa = FiniteAutomaton(graph_to_nfa(graph, start, final))
+    intersection = intersect_automata(nfa, dfa, lbl=False)
 
-            if u in fa1.start_states and v in fa2.start_states:
-                fa.start_states.add(State(k))
+    if intersection.is_empty():
+        return []
 
-            if u in fa1.final_states and v in fa2.final_states:
-                fa.final_states.add(State(k))
+    from_states, to_states = intersection.get_transitive_closure().nonzero()
+    n = len(dfa)
 
-    return fa
+    return [
+        (nfa.get_state_by_index(from_state // n), nfa.get_state_by_index(to_state // n))
+        for from_state, to_state in zip(from_states, to_states)
+        if from_state in intersection.start_states
+        and to_state in intersection.final_states
+    ]
diff --git a/project/task4.py b/project/task4.py
@@ -0,0 +1,25 @@
+from project.task3 import FiniteAutomaton, intersect_automata
+
+
+def reachability_with_constraints(
+    fa: FiniteAutomaton, constraints_fa: FiniteAutomaton
+) -> dict[int, set[int]]:
+    intersection = intersect_automata(fa, constraints_fa, lbl=False)
+    res = {state: set() for state in fa.start_states}
+
+    if intersection.is_empty():
+        return res
+
+    from_states, to_states = intersection.get_transitive_closure().nonzero()
+    n = len(constraints_fa)
+
+    for from_state, to_state in zip(from_states, to_states):
+        if (
+            from_state in intersection.start_states
+            and to_state in intersection.final_states
+        ):
+            res[fa.get_state_by_index(from_state // n)].add(
+                fa.get_state_by_index(to_state // n)
+            )
+
+    return res
diff --git a/project/task6.py b/project/task6.py
@@ -0,0 +1,57 @@
+from pyformlang.cfg import CFG, Variable, Terminal, Epsilon
+
+from collections import defaultdict
+from typing import Tuple
+
+
+def cfg_to_weak_normal_form(initCfg, start="S") -> CFG:
+    elimCfg = initCfg.eliminate_unit_productions().remove_useless_symbols()
+    return CFG(
+        productions=set(
+            elimCfg._decompose_productions(
+                elimCfg._get_productions_with_only_single_terminals()
+            )
+        ),
+        start_symbol=Variable(start),
+    )
+
+
+def cfpq_with_hellings(cfg, graph, start_nodes=None, final_nodes=None):
+    terminal, epsilon, mult, temp = defaultdict(set), set(), defaultdict(set), set()
+    for prod in cfg_to_weak_normal_form(cfg).productions:
+        if len(prod.body) == 2:
+            mult[prod.head].add((prod.body[0], prod.body[1]))
+        elif len(prod.body) == 1 and isinstance(prod.body[0], Terminal):
+            terminal[prod.head].add(prod.body[0])
+        elif len(prod.body) == 1 and isinstance(prod.body[0], Epsilon):
+            epsilon.add(prod.body[0])
+
+    cur = {
+        (n, start, end)
+        for (start, end, label) in graph.edges.data("label")
+        for n in terminal
+        if label in terminal[n]
+    }.union({(n, node, node) for n in epsilon for node in graph.nodes})
+
+    copy = cur.copy()
+    while len(copy) != 0:
+        n1, v1, u1 = copy.pop()
+        for n2, v2, u2 in cur:
+            if v1 == u2:
+                for N_k in mult:
+                    if (n2, n1) in mult[N_k] and (N_k, v2, v1) not in r:
+                        copy.add((N_k, v2, u1))
+                        temp.add((N_k, v2, u1))
+
+    return {
+        (start, end)
+        for (n, start, end) in cur.union(temp)
+        if Variable(n) == cfg.start_symbol
+        and (start_nodes is None or start in start_nodes)
+        and (final_nodes is None or end in final_nodes)
+    }
+
+
+def read_cfgrammar(filePath, start="S"):
+    with open(filePath, "r") as file:
+        return CFG.from_text(file.read(), Variable(start))