From 1cc6b78fe8eef1d8b3f1a157ab42ee50644938d0 Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 29 May 2024 11:25:20 +0300 Subject: [PATCH 1/7] [HW-4.0] Add initial sollution for 4th task --- project/task3.py | 208 +++++++++++++++++++++++++++++++---------------- project/task4.py | 25 ++++++ 2 files changed, 165 insertions(+), 68 deletions(-) create mode 100644 project/task4.py diff --git a/project/task3.py b/project/task3.py index 96f0fd587..501270bcf 100644 --- a/project/task3.py +++ b/project/task3.py @@ -1,84 +1,156 @@ -from scipy.sparse import dok_matrix, kron from pyformlang.finite_automaton import ( DeterministicFiniteAutomaton as DFA, - NondeterministicFiniteAutomaton as NDFA, + NondeterministicFiniteAutomaton as NFA, State, + Symbol, ) +from networkx import MultiDiGraph +from scipy.sparse import dok_matrix, kron +from typing import Iterable +from functools import reduce + +from project.task2 import regex_to_dfa, graph_to_nfa class FiniteAutomaton: - def __init__(self, dfa=None): - if not isinstance(dfa, DFA) and not isinstance(dfa, NDFA): + def __init__(self, fa=None) -> None: + self.lbl = True + self.matrices = {} + if fa is None: + self.start_states = set() + self.final_states = set() + self.state_to_index = {} return - states = dfa.to_dict() - self.mapping = {v: i for i, v in enumerate(dfa.states)} - self.sparse = dict() - - for label in dfa.symbols: - self.sparse[label] = dok_matrix( - (len(dfa.states), len(dfa.states)), dtype=bool - ) - for u, edges in states.items(): - if label in edges: - for v in ( - edges[label] - if isinstance(edges[label], set) - else {edges[label]} - ): - self.sparse[label][self.mapping[u], self.mapping[v]] = True - - self.start_states = dfa.start_states - self.final_states = dfa.final_states - - def accepts(self, word): - return self.to_ndfa().accepts("".join(list(word))) - - def is_empty(self): - return len(self.sparse) == 0 - - def mapping_for(self, u): - return self.mapping[State(u)] - - def to_ndfa(self): - ndfa = NDFA() - for label in self.sparse.keys(): - m_size = self.sparse[label].shape[0] - for u in range(m_size): - for v in range(m_size): - if self.sparse[label][u, v]: - ndfa.add_transition( - self.mapping_for(u), label, self.mapping_for(v) - ) - - for s in self.start_states: - ndfa.add_start_state(self.mapping_for(s)) - for s in self.final_states: - ndfa.add_final_state(self.mapping_for(s)) - return ndfa - - -def intersect_automata(fa1: FiniteAutomaton, fa2: FiniteAutomaton): - labels = fa1.sparse.keys() & fa2.sparse.keys() - fa = FiniteAutomaton() - fa.sparse = dict() - fa.start_states = set() - fa.final_states = set() - fa.mapping = dict() + self.start_states = fa.start_states + self.final_states = fa.final_states + + self.state_to_index = {state: index for index, state in enumerate(fa.states)} + self.index_to_state = { + index: state for state, index in self.state_to_index.items() + } + n_states = len(fa.states) + + for from_state, transitions in fa.to_dict().items(): + for symbol, to_states in transitions.items(): + if symbol not in self.matrices.keys(): + self.matrices[symbol] = dok_matrix((n_states, n_states), dtype=bool) + if isinstance(fa, DFA): + self.matrices[symbol][ + self.state_to_index[from_state], self.state_to_index[to_states] + ] = True + else: + for to_state in to_states: + self.matrices[symbol][ + self.state_to_index[from_state], + self.state_to_index[to_state], + ] = True + + def to_nfa(self) -> NFA: + nfa = NFA() + + for state in self.start_states: + nfa.add_start_state(state) + + for state in self.final_states: + nfa.add_final_state(state) + + for label, matrix in self.matrices.items(): + n, m = matrix.shape + for from_state in range(n): + for to_state in range(m): + if matrix[from_state, to_state]: + nfa.add_transition(State(from_state), label, State(to_state)) + + return nfa + + def set_state_to_index(self, new_state_to_index): + self.state_to_index = new_state_to_index + self.index_to_state = { + index: state for state, index in self.state_to_index.items() + } + + def set_true(self, label, row, column): + self.matrices[label][row, column] = True + + def add_label_if_not_exist(self, label, dim=None): + if label not in self.matrices: + dim = dim or len(self) + self.matrices[label] = dok_matrix((dim, dim), dtype=bool) + + def accepts(self, word: Iterable[Symbol]) -> bool: + return self.to_nfa().accepts(word) + def is_empty(self) -> bool: + return self.to_nfa().is_empty() + + def get_index(self, state) -> int: + return self.state_to_index.get(state, 0) + + def get_state_by_index(self, index: int): + return self.index_to_state[index] + + def __len__(self): + return len(self.state_to_index) + + def labels(self): + return self.state_to_index.keys() if self.lbl else self.matrices.keys() + + def get_transitive_closure(self): + if len(self.matrices.values()) == 0: + return dok_matrix((0, 0), dtype=bool) + + closure = reduce(lambda x, y: x + y, self.matrices.values()) + + while True: + prev_zero_count = closure.count_nonzero() + closure += closure @ closure + if prev_zero_count == closure.count_nonzero(): + return closure + + +def intersect_automata( + auto1: FiniteAutomaton, auto2: FiniteAutomaton, lbl: bool = True +) -> FiniteAutomaton: + auto1.lbl = auto2.lbl = not lbl + res = FiniteAutomaton() + + for state1, index1 in auto1.state_to_index.items(): + for state2, index2 in auto2.state_to_index.items(): + index = len(auto2) * index1 + index2 + res.state_to_index[index] = index + + if state1 in auto1.start_states and state2 in auto2.start_states: + res.start_states.add(State(index)) + + if state1 in auto1.final_states and state2 in auto2.final_states: + res.final_states.add(State(index)) + + labels = auto1.labels() & auto2.labels() for label in labels: - fa.sparse[label] = kron(fa1.sparse[label], fa2.sparse[label], "csr") + res.matrices[label] = kron( + auto1.matrices[label], auto2.matrices[label], "csr" + ) + + return res - for u, i in fa1.mapping.items(): - for v, j in fa2.mapping.items(): - k = len(fa2.mapping) * i + j - fa.mapping[k] = k +def paths_ends( + graph: MultiDiGraph, start: set[int], final: set[int], regex: str +) -> list[tuple[object, object]]: + dfa = FiniteAutomaton(regex_to_dfa(regex)) + nfa = FiniteAutomaton(graph_to_nfa(graph, start, final)) + intersection = intersect_automata(nfa, dfa, lbl=False) - if u in fa1.start_states and v in fa2.start_states: - fa.start_states.add(State(k)) + if intersection.is_empty(): + return [] - if u in fa1.final_states and v in fa2.final_states: - fa.final_states.add(State(k)) + from_states, to_states = intersection.get_transitive_closure().nonzero() + n = len(dfa) - return fa + return [ + (nfa.get_state_by_index(from_state // n), nfa.get_state_by_index(to_state // n)) + for from_state, to_state in zip(from_states, to_states) + if from_state in intersection.start_states + and to_state in intersection.final_states + ] \ No newline at end of file diff --git a/project/task4.py b/project/task4.py new file mode 100644 index 000000000..b765b435d --- /dev/null +++ b/project/task4.py @@ -0,0 +1,25 @@ +from project.task3 import FiniteAutomaton, intersect_automata + + +def reachability_with_constraints( + fa: FiniteAutomaton, constraints_fa: FiniteAutomaton +) -> dict[int, set[int]]: + intersection = intersect_automata(fa, constraints_fa, lbl=False) + res = {state: set() for state in fa.start_states} + + if intersection.is_empty(): + return res + + from_states, to_states = intersection.get_transitive_closure().nonzero() + n = len(constraints_fa) + + for from_state, to_state in zip(from_states, to_states): + if ( + from_state in intersection.start_states + and to_state in intersection.final_states + ): + res[fa.get_state_by_index(from_state // n)].add( + fa.get_state_by_index(to_state // n) + ) + + return res \ No newline at end of file From d1df0ee7a26593e2871dbef6a8aa56eefbfed9ed Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 29 May 2024 11:32:43 +0300 Subject: [PATCH 2/7] [HW-4.1] Upgrade python version --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2fca47a6a..65fc57765 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,14 +9,14 @@ jobs: steps: - name: Set up Git repository uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.10.12 uses: actions/setup-python@v2 with: - python-version: "3.9" + python-version: "3.10.12" - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install -r ./requirements.txt - name: Test with pytest run: | - python ./scripts/run_tests.py + python ./scripts/run_tests.py \ No newline at end of file From 7a0905d28bff38f28c37315410a68c21c59d2b14 Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 29 May 2024 11:36:36 +0300 Subject: [PATCH 3/7] [HA-4.2] Fix code style --- .github/workflows/test.yml | 2 +- project/task3.py | 6 ++---- project/task4.py | 2 +- scripts/precommit.sh | 1 + 4 files changed, 5 insertions(+), 6 deletions(-) create mode 100644 scripts/precommit.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 65fc57765..1c84905ad 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -19,4 +19,4 @@ jobs: python -m pip install -r ./requirements.txt - name: Test with pytest run: | - python ./scripts/run_tests.py \ No newline at end of file + python ./scripts/run_tests.py diff --git a/project/task3.py b/project/task3.py index 501270bcf..6bc4ba1f0 100644 --- a/project/task3.py +++ b/project/task3.py @@ -128,9 +128,7 @@ def intersect_automata( labels = auto1.labels() & auto2.labels() for label in labels: - res.matrices[label] = kron( - auto1.matrices[label], auto2.matrices[label], "csr" - ) + res.matrices[label] = kron(auto1.matrices[label], auto2.matrices[label], "csr") return res @@ -153,4 +151,4 @@ def paths_ends( for from_state, to_state in zip(from_states, to_states) if from_state in intersection.start_states and to_state in intersection.final_states - ] \ No newline at end of file + ] diff --git a/project/task4.py b/project/task4.py index b765b435d..de536085e 100644 --- a/project/task4.py +++ b/project/task4.py @@ -22,4 +22,4 @@ def reachability_with_constraints( fa.get_state_by_index(to_state // n) ) - return res \ No newline at end of file + return res diff --git a/scripts/precommit.sh b/scripts/precommit.sh new file mode 100644 index 000000000..049ea20ff --- /dev/null +++ b/scripts/precommit.sh @@ -0,0 +1 @@ +pre-commit run --all-files --color always --verbose --show-diff-on-failure From 32927068631d9296e0f0536acc10fa66d09554c2 Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 29 May 2024 17:07:32 +0300 Subject: [PATCH 4/7] [HA-6.0] Add initial sollution for 6th task --- project/task6.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 project/task6.py diff --git a/project/task6.py b/project/task6.py new file mode 100644 index 000000000..9d1cb9887 --- /dev/null +++ b/project/task6.py @@ -0,0 +1,57 @@ +from pyformlang.cfg import CFG, Variable, Terminal, Epsilon + +from collections import defaultdict +from typing import Tuple + + +def cfg_to_weak_normal_form(initCfg, start="S") -> CFG: + elimCfg = initCfg.eliminate_unit_productions().remove_useless_symbols() + return CFG( + productions=set( + elimCfg._decompose_productions( + elimCfg._get_productions_with_only_single_terminals() + ) + ), + start_symbol=Variable(start), + ) + + +def cfpq_with_hellings(cfg, graph, start_nodes=None, final_nodes=None): + terminal, epsilon, mult, temp = defaultdict(set), set(), defaultdict(set), set() + for prod in cfg_to_weak_normal_form(cfg).productions: + if len(prod.body) == 2: + mult[prod.head].add((prod.body[0], prod.body[1])) + elif len(prod.body) == 1 and isinstance(prod.body[0], Terminal): + terminal[prod.head].add(prod.body[0]) + elif len(prod.body) == 1 and isinstance(prod.body[0], Epsilon): + epsilon.add(prod.body[0]) + + cur = { + (n, start, end) + for (start, end, label) in graph.edges.data("label") + for n in terminal + if label in terminal[n] + }.union({(n, node, node) for n in epsilon for node in graph.nodes}) + + copy = cur.copy() + while len(copy) != 0: + n1, v1, u1 = copy.pop() + for n2, v2, u2 in cur: + if v1 == u2: + for N_k in mult: + if (n2, n1) in mult[N_k] and (N_k, v2, v1) not in r: + copy.add((N_k, v2, u1)) + temp.add((N_k, v2, u1)) + + return { + (start, end) + for (n, start, end) in cur.union(temp) + if Variable(n) == cfg.start_symbol + and (start_nodes is None or start in start_nodes) + and (final_nodes is None or end in final_nodes) + } + + +def read_cfgrammar(filePath, start="S"): + with open(filePath, "r") as file: + return CFG.from_text(file.read(), Variable(start)) From 6e20cae123005e6d3423b36cffc4f839eb35086a Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 29 May 2024 18:23:20 +0300 Subject: [PATCH 5/7] [HA-7.0] Add initial sollution for 7th task --- project/task7.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 project/task7.py diff --git a/project/task7.py b/project/task7.py new file mode 100644 index 000000000..ee296fad2 --- /dev/null +++ b/project/task7.py @@ -0,0 +1,48 @@ +from scipy.sparse import lil_matrix +from pyformlang.cfg import CFG, Terminal +import networkx as nx +from typing import Set, Tuple +from project.task6 import cfg_to_weak_normal_form + + +def cfpq_with_matrix(cfg, graph, start_nodes=None, final_nodes=None): + wnf = cfg_to_weak_normal_form(cfg) + mapVarIndex = { + variable: index + for index, variable in enumerate( + {production.head for production in wnf.productions} + ) + } + + matrices = {} + n = graph.number_of_nodes() + for production in wnf.productions: + matrices[production.head] = lil_matrix((n, n), dtype=bool) + if len(production.body) == 1 and isinstance(production.body[0], Terminal): + for start, end, label in graph.edges.data("label"): + if str(production.body[0]) == str(label): + matrices[production.head][start, end] = True + + changed = True + while changed: + changed = False + for production in wnf.productions: + if ( + len(production.body) == 2 + and production.body[0] in mapVarIndex + and production.body[1] in mapVarIndex + ): + prev = matrices[production.head].nnz + matrices[production.head] += ( + matrices[production.body[0]] * matrices[production.body[1]] + ) + changed = changed or (prev != matrices[production.head].nnz) + + return { + (row, column) + for variable, matrix in matrices.items() + for row, column in zip(matrix.tocoo().row, matrix.tocoo().col) + if variable == wnf.start_symbol + and (start_nodes is None or row in start_nodes) + and (final_nodes is None or column in final_nodes) + } From d56a8fc724580aab0ac23d1361eff4c464441782 Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 29 May 2024 19:58:36 +0300 Subject: [PATCH 6/7] [HA-8.0] Add initial sollution for 8th task --- project/task8.py | 122 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 project/task8.py diff --git a/project/task8.py b/project/task8.py new file mode 100644 index 000000000..2c7e2ceef --- /dev/null +++ b/project/task8.py @@ -0,0 +1,122 @@ +from pyformlang.finite_automaton import State, Symbol +from pyformlang.cfg import Epsilon, CFG +from pyformlang.regular_expression import Regex +from pyformlang.rsa import Box, RecursiveAutomaton +import networkx as nx + +from collections import defaultdict + +from project.task2 import graph_to_nfa +from project.task3 import FiniteAutomaton, intersect_automata + + +def cfg_to_rsm(cfg): + productions = defaultdict(list) + for production in cfg.productions: + regex = ( + " ".join(var.value for var in production.body) + if len(production.body) + else Epsilon().to_text() + ) + productions[Symbol(production.head)].append(regex) + + regexes = { + Box(Regex("|".join(regex_list)).to_epsilon_nfa().to_deterministic(), symbol) + for symbol, regex_list in productions.items() + } + return RecursiveAutomaton(productions.keys(), Symbol("S"), regexes) + + +def ebnf_to_rsm(ebnf): + strip = lambda x: x.strip() + productions = defaultdict(list) + for line in map(strip, ebnf.splitlines()): + parts = [*map(strip, line.split("->"))] + if len(parts) == 2: + head, body = parts + body = body if body != "" else Epsilon().to_text() + productions[Symbol(head.strip())].append(body) + + regexes = { + Box(Regex("|".join(regex_list)).to_epsilon_nfa().to_deterministic(), symbol) + for symbol, regex_list in productions.items() + } + return RecursiveAutomaton(productions.keys(), Symbol("S"), regexes) + + +def cfpq_with_tensor( + rsm: RecursiveAutomaton | CFG, + graph: nx.MultiDiGraph, + final_nodes=None, + start_nodes=None, +): + + if not isinstance(rsm, RecursiveAutomaton): + rsm = cfg_to_rsm(rsm) + + start_nodes = graph.nodes if start_nodes is None else start_nodes + final_nodes = graph.nodes if final_nodes is None else final_nodes + + prev = 0 + result = set() + rsm_matrix = rsm_to_matrix(rsm)[0] + automaton = FiniteAutomaton(graph_to_nfa(graph, start_nodes, final_nodes)) + + while True: + closure = [ + *zip( + *intersect_automata(rsm_matrix, automaton) + .get_transitive_closure() + .nonzero() + ) + ] + + cur = len(closure) + if cur == prev: + break + prev = cur + + test = lambda x: x in rsm_matrix.start_states and x in rsm_matrix.final_states + for i, j in closure: + if test(rsm_matrix.get_index(i)): + var = rsm_matrix.indexes_dict()[i].value[0] + automaton.add_label_if_not_exist(var) + automaton.set_true(var, i, j) + result.add((i, j)) + return result + + +def rsm_to_matrix(rsm: RecursiveAutomaton) -> tuple: + states, epsilon_symbols = set(), set() + automaton = FiniteAutomaton() + + for v, p in rsm.boxes.items(): + for s in p.dfa.start_states: + automaton.start_states.add(State((v, s.value))) + for s in p.dfa.final_states: + automaton.final_states.add(State((v, s.value))) + for s in p.dfa.states: + states.add(State((v, s.value))) + + automaton.set_state_to_index( + { + value: index + for index, value in enumerate(sorted(states, key=lambda x: x.value[1])) + } + ) + + for v, p in rsm.boxes.items(): + for src, transition in p.dfa.to_dict().items(): + for label, dst in transition.items(): + label = label.value + if isinstance(dst, Epsilon): + epsilon_symbols.add(label) + + automaton.add_label_if_not_exist(label) + for target in {dst} if not isinstance(dst, set) else dst: + automaton.set_true( + label, + automaton.get_index(State((v, src.value))), + automaton.get_index(State((v, target.value))), + ) + return automaton, epsilon_symbols From a2040948648d45ce1e419b496aa7572db4714391 Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 29 May 2024 20:44:31 +0300 Subject: [PATCH 7/7] [HA-9.0] Add initial sollution for 9th task --- project/task9.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 project/task9.py diff --git a/project/task9.py b/project/task9.py new file mode 100644 index 000000000..9700197e1 --- /dev/null +++ b/project/task9.py @@ -0,0 +1,57 @@ +from pyformlang.rsa import RecursiveAutomaton +from pyformlang.cfg import CFG +from pyformlang.finite_automaton import State, Symbol +import networkx as nx + +from copy import deepcopy + +from project.task8 import cfg_to_rsm + + +def cfpq_with_gll( + rsm: CFG | RecursiveAutomaton, + graph: nx.DiGraph, + start_nodes=None, + final_nodes=None, +): + + if not isinstance(rsm, RecursiveAutomaton): + rsm = cfg_to_rsm(rsm) + + start_nodes = graph.nodes if start_nodes is None else start_nodes + final_nodes = graph.nodes if final_nodes is None else final_nodes + + result = set() + label = "S" if rsm.initial_label.value is None else rsm.initial_label.value + + dfa_state = rsm.boxes[label].dfa.start_state.value + dfa = rsm.boxes[label].dfa.to_dict() + dfa.setdefault(State(dfa_state), dict()) + + stack = {(v, label): set() for v in start_nodes} + visited = {(v, (dfa_state, label), (v, label)) for v in start_nodes} + queue = deepcopy(visited) + + def addVisit(node, rsm_context, stack_context): + s = (node, rsm_context, stack_context) + if s not in visited: + visited.add(s) + queue.add(s) + + while len(queue) > 0: + v, (_, _), (stack_node, stack_label) = queue.pop() + stack_state = (stack_node, stack_label) + + if stack_node in start_nodes and stack_label == dfa_state and v in final_nodes: + result.add((stack_node, v)) + + for states in stack.setdefault(stack_state, set()): + addVisit(v, states[0], states[1]) + + for symbol, _ in dfa.items(): + if symbol in rsm.labels: + start_sym_state = rsm.boxes[symbol].dfa.start_state.value + rsm_state_ = (start_sym_state, symbol.value) + stack_state_ = (v, symbol.value) + addVisit(v, rsm_state_, stack_state_) + return result