Skip to content

Commit

Permalink
Merge branch 'FormalLanguageConstrainedPathQuerying:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
TreshMom authored Mar 4, 2024
2 parents ad2b752 + 989a71c commit bf87ad0
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 53 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/code_style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
matrix:

# Each option you define in the matrix has a key and value
python-version: [ 3.8 ]
python-version: [ 3.9 ]

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
antlr4-python3-runtime
black
cfpq-data
networkx==3.2.1
pre-commit
pydot
pytest
Expand Down
2 changes: 1 addition & 1 deletion tasks/task2.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Задача 2. Построение детерминированного конечного автомата по регулярному выражению и недетерминированного конечного автомата по графу

* **Жёсткий дедлайн**: 21.02.2024, 23:59
* **Жёсткий дедлайн**: 28.02.2024, 23:59
* Полный балл: 5

## Задача
Expand Down
25 changes: 23 additions & 2 deletions tasks/task3.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,33 @@
# Задача 3. Регулярные запросы для всех пар вершин

* **Мягкий дедлайн**: 25.09.2023, 23:59
* **Жёсткий дедлайн**: 28.09.2023, 23:59
* **Жёсткий дедлайн**: 06.03.2024, 23:59
* Полный балл: 5

## Задача

- [ ] Реализовать тип (FiniteAutomaton), представляющий конечный автомат в виде разреженной матрицы смежности из [sciPy](https://docs.scipy.org/doc/scipy/reference/sparse.html) (или сразу её булевой декомпозиции) и информации о стартовых и финальных вершинах. У типа должны быть конструкторы от ```DeterministicFiniteAutomaton``` и ```NondeterministicFiniteAutomaton``` из [Задачи 2](https://github.com/FormalLanguageConstrainedPathQuerying/formal-lang-course/blob/main/tasks/task2.md).
- [ ] Реализовать функцию-интерпретатор для типа ```FiniteAutomaton```, выясняющую, принимает ли автомат заданную строку и является ли язык, задающийся автоматом, пустым. Для реализации последней функции рекомендуется использовать транзитивное замыкание матрицы смежности.
- Требуемые функции:
```python
def accepts(self, word: Iterable[Symbol]) -> bool:
pass
def is_empty(self) -> bool:
pass
```
- [ ] Используя [разреженные матрицы из sciPy](https://docs.scipy.org/doc/scipy/reference/sparse.html) реализовать **функцию** пересечения двух конечных автоматов через тензорное произведение.
- Требуемая функция:
```python
def intersect_automata(automaton1: FiniteAutomaton,
automaton2: FiniteAutomaton) -> FiniteAutomaton:
pass
```
- [ ] На основе предыдущей функции реализовать **функцию** выполнения регулярных запросов к графам: по графу с заданными стартовыми и финальными вершинами и регулярному выражению вернуть те пары вершин из заданных стартовых и финальных, которые связанны путём, формирующем слово из языка, задаваемого регулярным выражением.
- Требуемая функция:
```python
def paths_ends(graph: MultiDiGraph, start_nodes: set[int],
final_nodes: set[int], regex:str) -> list[tuple[NodeView, NodeView]]:
pass
```

- Для конструирования регулярного запроса и преобразований графа использовать результаты [Задачи 2](https://github.com/FormalLanguageConstrainedPathQuerying/formal-lang-course/blob/main/tasks/task2.md).
- [ ] Добавить необходимые тесты.
125 changes: 76 additions & 49 deletions tests/autotests/test_task2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# You MUST NOT touch anything here except ONE block below
# You CAN modify this file IF AND ONLY IF you have found a bug and are willing to fix it
# Otherwise, please report it
import copy

import pyformlang.finite_automaton
from networkx import MultiDiGraph
from pyformlang.regular_expression import Regex
Expand Down Expand Up @@ -58,83 +60,88 @@ def test(self, regex_str: str) -> None:
assert dfa.accepts(word)


LABELS = ["a", "b", "c", "x", "y", "z", "alpha", "beta", "gamma", "ɛ"]
LABELS = ["a", "b", "c", "x", "y", "z", "alpha", "beta", "gamma"]
LABEL = "label"
IS_FINAL = "is_final"
IS_START = "is_start"


class GraphWordsHelper:
graph = None
all_paths = None
final_nodes = None
transitive_closure = None
start_nodes = None

def __init__(self, graph: MultiDiGraph):
self.graph = graph
self.all_paths = nx.shortest_path(graph)
self.graph = graph.copy()
self.final_nodes = list(
map(lambda x: x[0], filter(lambda y: y[1], self.graph.nodes(data=IS_FINAL)))
)
self.start_nodes = list(
map(lambda x: x[0], filter(lambda y: y[1], self.graph.nodes(data=IS_START)))
)
self.transitive_closure: nx.MultiDiGraph = nx.transitive_closure(
copy.deepcopy(self.graph), reflexive=False
)

def is_reachable(self, source, target):
if source not in self.all_paths.keys():
return False
return target in self.all_paths[source].keys()
return target in self.transitive_closure[source].keys()

def _exists_any_final_path(self, node):
for final_node in self.final_nodes:
if self.is_reachable(node, final_node):
return True
return False

def _take_a_step(self, node):
for node_to, edge_dict in dict(self.graph[node]).items():
for edge_data in edge_dict.values():
yield {"node_to": node_to, "label": edge_data["label"]}
yield node_to, edge_data[LABEL]

def _is_final_node(self, node):
return self.graph.nodes(data=True)[node]["is_final"]

def generate_words_by_node(self, node, word=None):
if word is None:
word = list()
for trans in self._take_a_step(node):
tmp = word.copy()
label = trans["label"]
if label != "ɛ":
return node in self.final_nodes

def generate_words_by_node(self, node):
queue = [(node, [])]
while len(queue) != 0:
(n, word) = queue.pop(0)
for node_to, label in self._take_a_step(n):
tmp = word.copy()
tmp.append(label)
if self._is_final_node(trans["node_to"]):
yield tmp.copy()
yield from self.generate_words_by_node(trans["node_to"], tmp.copy())

def take_words_by_node(self, node, n):
final_nodes = list(map(lambda x: x[0], self.graph.nodes(data="is_final")))
if any(
map(lambda final_node: self.is_reachable(node, final_node), final_nodes)
):
return itertools.islice(self.generate_words_by_node(node), 0, n)
if self._is_final_node(node_to):
yield tmp.copy()
if self._exists_any_final_path(node_to):
queue.append((node_to, tmp.copy()))

def take_n_words_by_node(self, node, n):
if self._exists_any_final_path(node):
return list(itertools.islice(self.generate_words_by_node(node), 0, n))
return []

def get_all_words_less_then_n(self, n: int) -> list[str]:
start_nodes = list(map(lambda x: x[0], self.graph.nodes(data="is_start")))
def get_words_with_limiter(self, limiter: int) -> list[str]:
result = list()
for start in start_nodes:
result.extend(self.take_words_by_node(start, n))
for start in self.start_nodes:
result.extend(self.take_n_words_by_node(start, limiter))
if start in self.final_nodes:
result.append([])
return result


@pytest.fixture(scope="class", params=range(5))
@pytest.fixture(scope="class", params=range(8))
def graph(request) -> MultiDiGraph:
n_of_nodes = random.randint(1, 20)
graph = nx.scale_free_graph(n_of_nodes)

for _, _, data in graph.edges(data=True):
data["label"] = random.choice(LABELS)

data[LABEL] = random.choice(LABELS)
for _, data in graph.nodes(data=True):
data[IS_FINAL] = False
data[IS_START] = False
return graph


class TestGraphToNfa:
def test_not_specified(self, graph: MultiDiGraph) -> None:
nfa: pyformlang.finite_automaton.NondeterministicFiniteAutomaton = graph_to_nfa(
graph, set(), set()
)
words_helper = GraphWordsHelper(graph)
words = words_helper.get_all_words_less_then_n(random.randint(10, 100))
if len(words) == 0:
assert nfa.is_empty()
else:
word = random.choice(words)
assert nfa.accepts(word)

def test_random(
def test_random_start_and_final(
self,
graph: MultiDiGraph,
) -> None:
Expand All @@ -149,10 +156,30 @@ def test_random(
)
)
nfa: pyformlang.finite_automaton.NondeterministicFiniteAutomaton = graph_to_nfa(
graph, start_nodes, final_nodes
graph.copy(), start_nodes.copy(), final_nodes.copy()
)
for node, data in graph.nodes(data=True):
if node in start_nodes:
data[IS_START] = True
if node in final_nodes:
data[IS_FINAL] = True
words_helper = GraphWordsHelper(graph)
words = words_helper.get_words_with_limiter(random.randint(10, 100))
if len(words) == 0:
assert nfa.is_empty()
else:
word = random.choice(words)
assert nfa.accepts(word)

def test_not_specified_start_and_final(self, graph: MultiDiGraph) -> None:
nfa: pyformlang.finite_automaton.NondeterministicFiniteAutomaton = graph_to_nfa(
graph.copy(), set(), set()
)
for _, data in graph.nodes(data=True):
data[IS_FINAL] = True
data[IS_START] = True
words_helper = GraphWordsHelper(graph)
words = words_helper.get_all_words_less_then_n(random.randint(10, 100))
words = words_helper.get_words_with_limiter(random.randint(10, 100))
if len(words) == 0:
assert nfa.is_empty()
else:
Expand Down
67 changes: 67 additions & 0 deletions tests/autotests/test_task3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# This file contains test cases that you need to pass to get a grade
# You MUST NOT touch anything here except ONE block below
# You CAN modify this file IF AND ONLY IF you have found a bug and are willing to fix it
# Otherwise, please report it
import pyformlang.finite_automaton
from networkx import MultiDiGraph
from pyformlang.regular_expression import Regex
import pytest
import random
import itertools
import networkx as nx

# Fix import statements in try block to run tests
try:
from project.task3 import intersect_automata, FiniteAutomaton
from project.task2 import regex_to_dfa
except ImportError:
pytestmark = pytest.mark.skip("Task 3 is not ready to test!")

REGEX_TO_TEST = [
("a", "b"),
("a", "a"),
("a*", "a"),
("a*", "aa"),
("a*", "a*"),
("(aa)*", "a*"),
("(a|b)*", "a*"),
("(a|b)*", "b"),
("(a|b)*", "bbb"),
("a|b", "a"),
("a|b", "a|c"),
("(a|b)(c|d)", "(a|c)(b|d)"),
("(a|b)*", "(a|c)*"),
("a*b*", "(a|b)*"),
("(ab)*", "(a|b)*"),
]


class TestIntersect:
@pytest.mark.parametrize(
"regex_str1, regex_str2",
REGEX_TO_TEST,
ids=lambda regex_tuple: regex_tuple,
)
def test(self, regex_str1: str, regex_str2: str) -> None:
dfa1 = FiniteAutomaton(regex_to_dfa(regex_str1))
dfa2 = FiniteAutomaton(regex_to_dfa(regex_str2))
intersect_fa = intersect_automata(dfa1, dfa2)

regex1: Regex = Regex(regex_str1)
regex2: Regex = Regex(regex_str2)
cfg_of_regex1: pyformlang.cfg.CFG = regex1.to_cfg()
intersect_cfg: pyformlang.cfg.CFG = cfg_of_regex1.intersection(regex2)
words = intersect_cfg.get_words()
if intersect_cfg.is_finite():
all_word_parts = list(words)
if len(all_word_parts) == 0:
assert intersect_fa.is_empty()
return
word_parts = random.choice(all_word_parts)
else:
index = random.randint(0, 2**9)
word_parts = next(itertools.islice(words, index, None))

word = map(lambda x: x.value, word_parts)

assert intersect_fa.accepts(word)

0 comments on commit bf87ad0

Please sign in to comment.