diff --git a/requirements.txt b/requirements.txt index 4ce7ceecd..04c4f5c00 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ antlr4-python3-runtime black cfpq-data +grammarinator @ git+https://github.com/renatahodovan/grammarinator.git@f3ffa71 networkx==3.2.1 pre-commit pydot diff --git a/tasks/task11.md b/tasks/task11.md index 531248c16..a3f2d6649 100644 --- a/tasks/task11.md +++ b/tasks/task11.md @@ -9,7 +9,7 @@ prog = stmt* stmt = bind | add | remove | declare -declare = VAR 'is' 'graph' +declare = 'let' VAR 'is' 'graph' bind = 'let' VAR '=' expr @@ -31,8 +31,8 @@ select = v_filter? v_filter? 'return' VAR (',' VAR)? 'where' VAR 'reachable' 'fr v_filter = 'for' VAR 'in' expr -VAR = [a..z]+[a..z 0..1]* -NUM = [1..9]+[0..9]* +VAR = [a..z] [a..z 0..9]* +NUM = 0 | ([1..9] [0..9]*) CHAR = '"' [a..z] '"' ``` @@ -45,15 +45,15 @@ let g is graph add edge (1, "a", 2) to g add edge (2, "a", 3) to g add edge (3, "a", 1) to g -add edge (1, "c", 0) to g -add edge (0, "b", 4) to g -add edge (4, "b", 0) to g +add edge (1, "c", 5) to g +add edge (5, "b", 4) to g +add edge (4, "b", 5) to g let q = "a"^[1..3] . q . "b"^[2..3] | "c" let r1 = for v in [2] return u where u reachable from v in g by q -add edge (0, "d", 5) to g +add edge (5, "d", 6) to g let r2 = for v in [2,3] return u,v where u reachable from v in g by (q . "d") @@ -199,4 +199,20 @@ _____________________________________ ## Задача - [ ] С использованием ANTLR реализовать синтаксический анализатор предложенного выше языка. А именно, реализовать функцию, которая принимает строку и возвращает дерево разбора. - [ ] Реализовать функцию, которая по дереву разбора возвращает количество узлов в нём. -- [ ] Реализовать функцию, которая по дереву разбора строит строку, которая была разобрана. +- [ ] Реализовать функцию, которая по дереву разбора строит ранее разобранную строку. +- [ ] Расширить CI шагом генерации парсера по спецификации. Обратите внимание, что генерируемые по спецификации файлы не выкладываются в репозиторий. + - [Grammarinator](https://github.com/renatahodovan/grammarinator), используемый нами для генерации тестов, подтягивает вместе с собой [ANTLeRinator](https://github.com/renatahodovan/antlerinator), который можно использовать для получения исполняемого файла ANTLR + - Либо можно использовать более стандартные [antlr4-tools](https://github.com/antlr/antlr4-tools) + +Требуемые функции: +```python +# Второе поле показывает корректна ли строка (True, если корректна) +def prog_to_tree(program: str) -> tuple[ParserRuleContext, bool]: + pass + +def nodes_count(tree: ParserRuleContext) -> int: + pass + +def tree_to_prog(tree: ParserRuleContext) -> str: + pass +``` diff --git a/tests/autotests/ProgramGenerator.py b/tests/autotests/ProgramGenerator.py new file mode 100644 index 000000000..d0ab3eef3 --- /dev/null +++ b/tests/autotests/ProgramGenerator.py @@ -0,0 +1,673 @@ +# Generated by Grammarinator 23.7.post76+gf3ffa71 +# Modified by test authors + +from math import inf +from grammarinator.runtime import * + + +KEYWORDS = { + "let", + "is", + "graph", + "remove", + "vertex", + "edge", + "vertices", + "from", + "add", + "to", + "return", + "where", + "reachable", + "in", + "by", + "for", +} + + +class ProgramGenerator(Generator): + + def prog(self, parent=None): + with UnparserRuleContext(self, "prog", parent) as rule: + current = rule.current + with QuantifierContext( + rule, 0, 1, inf, ProgramGenerator._quant_sizes[1], 0 + ) as quant0: + while quant0(): + with QuantifiedContext(rule): + current = rule.current + self.stmt(parent=current) + current = rule.current + return current + + def stmt(self, parent=None): + with UnparserRuleContext(self, "stmt", parent) as rule: + current = rule.current + with AlternationContext( + rule, + 0, + ProgramGenerator._alt_sizes[1], + 0, + ProgramGenerator._alt_conds[1], + ) as alt0: + current = rule.current + [self.bind, self.add, self.remove, self.declare][alt0()](parent=current) + current = rule.current + return current + + def declare(self, parent=None): + with UnparserRuleContext(self, "declare", parent) as rule: + current = rule.current + self._reserve(3, self.T__0, parent=current) + self._reserve(2, self.VAR, parent=current) + self._reserve(1, self.T__1, parent=current) + self.T__2(parent=current) + return current + + def bind(self, parent=None): + with UnparserRuleContext(self, "bind", parent) as rule: + current = rule.current + self._reserve(3, self.T__0, parent=current) + self._reserve(2, self.VAR, parent=current) + self._reserve(1, self.T__3, parent=current) + self.expr(parent=current) + return current + + def remove(self, parent=None): + with UnparserRuleContext(self, "remove", parent) as rule: + current = rule.current + self._reserve(4, self.T__4, parent=current) + with AlternationContext( + rule, + 0, + ProgramGenerator._alt_sizes[2], + 3, + ProgramGenerator._alt_conds[2], + ) as alt0: + current = rule.current + [self.T__5, self.T__6, self.T__7][alt0()](parent=current) + current = rule.current + self._reserve(2, self.expr, parent=current) + self._reserve(1, self.T__8, parent=current) + self.VAR(parent=current) + return current + + def add(self, parent=None): + with UnparserRuleContext(self, "add", parent) as rule: + current = rule.current + self._reserve(4, self.T__9, parent=current) + with AlternationContext( + rule, + 0, + ProgramGenerator._alt_sizes[3], + 3, + ProgramGenerator._alt_conds[0], + ) as alt0: + current = rule.current + [self.T__5, self.T__6][alt0()](parent=current) + current = rule.current + self._reserve(2, self.expr, parent=current) + self._reserve(1, self.T__10, parent=current) + self.VAR(parent=current) + return current + + def expr(self, parent=None): + with UnparserRuleContext(self, "expr", parent) as rule: + current = rule.current + with AlternationContext( + rule, + 0, + ProgramGenerator._alt_sizes[4], + 0, + ProgramGenerator._alt_conds[3], + ) as alt0: + current = rule.current + [ + self.NUM, + self.CHAR, + self.VAR, + self.edge_expr, + self.set_expr, + self.regexp, + self.select, + ][alt0()](parent=current) + current = rule.current + return current + + def set_expr(self, parent=None): + with UnparserRuleContext(self, "set_expr", parent) as rule: + current = rule.current + self._reserve(2, self.T__11, parent=current) + self._reserve(1, self.expr, parent=current) + with QuantifierContext( + rule, 0, 0, inf, ProgramGenerator._quant_sizes[2], 1 + ) as quant0: + while quant0(): + with QuantifiedContext(rule): + current = rule.current + self._reserve(1, self.T__12, parent=current) + self.expr(parent=current) + current = rule.current + self.T__13(parent=current) + return current + + def edge_expr(self, parent=None): + with UnparserRuleContext(self, "edge_expr", parent) as rule: + current = rule.current + self._reserve(6, self.T__14, parent=current) + self._reserve(5, self.expr, parent=current) + self._reserve(4, self.T__12, parent=current) + self._reserve(3, self.expr, parent=current) + self._reserve(2, self.T__12, parent=current) + self._reserve(1, self.expr, parent=current) + self.T__15(parent=current) + return current + + def regexp(self, parent=None): + with UnparserRuleContext(self, "regexp", parent) as rule: + current = rule.current + with AlternationContext( + rule, + 0, + ProgramGenerator._alt_sizes[5], + 0, + ProgramGenerator._alt_conds[3], + ) as alt0: + current = rule.current + choice0 = alt0() + if choice0 == 0: + self.CHAR(parent=current) + elif choice0 == 1: + self.VAR(parent=current) + elif choice0 == 2: + self._reserve(2, self.T__14, parent=current) + self._reserve(1, self.regexp, parent=current) + self.T__15(parent=current) + elif choice0 == 3: + self._reserve(2, self.regexp, parent=current) + self._reserve(1, self.T__16, parent=current) + self.regexp(parent=current) + elif choice0 == 4: + self._reserve(5, self.regexp, parent=current) + self._reserve(4, self.T__17, parent=current) + self.myrange(parent=current) + elif choice0 == 5: + self._reserve(2, self.regexp, parent=current) + self._reserve(1, self.T__18, parent=current) + self.regexp(parent=current) + elif choice0 == 6: + self._reserve(2, self.regexp, parent=current) + self._reserve(1, self.T__19, parent=current) + self.regexp(parent=current) + current = rule.current + return current + + def myrange(self, parent=None): + with UnparserRuleContext(self, "myrange", parent) as rule: + current = rule.current + self._reserve(3, self.T__11, parent=current) + self._reserve(2, self.NUM, parent=current) + self._reserve(1, self.T__20, parent=current) + with QuantifierContext( + rule, 0, 0, 1, ProgramGenerator._quant_sizes[3], 1 + ) as quant0: + while quant0(): + with QuantifiedContext(rule): + current = rule.current + self.NUM(parent=current) + current = rule.current + self.T__13(parent=current) + return current + + def select(self, parent=None): + with UnparserRuleContext(self, "select", parent) as rule: + current = rule.current + with QuantifierContext( + rule, 0, 0, 1, ProgramGenerator._quant_sizes[1], 11 + ) as quant0: + while quant0(): + with QuantifiedContext(rule): + current = rule.current + self.v_filter(parent=current) + current = rule.current + with QuantifierContext( + rule, 1, 0, 1, ProgramGenerator._quant_sizes[1], 11 + ) as quant1: + while quant1(): + with QuantifiedContext(rule): + current = rule.current + self.v_filter(parent=current) + current = rule.current + self._reserve(10, self.T__21, parent=current) + self._reserve(9, self.VAR, parent=current) + with QuantifierContext( + rule, 2, 0, 1, ProgramGenerator._quant_sizes[4], 9 + ) as quant2: + while quant2(): + with QuantifiedContext(rule): + current = rule.current + self._reserve(1, self.T__12, parent=current) + self.VAR(parent=current) + current = rule.current + self._reserve(8, self.T__22, parent=current) + self._reserve(7, self.VAR, parent=current) + self._reserve(6, self.T__23, parent=current) + self._reserve(5, self.T__8, parent=current) + self._reserve(4, self.VAR, parent=current) + self._reserve(3, self.T__24, parent=current) + self._reserve(2, self.VAR, parent=current) + self._reserve(1, self.T__25, parent=current) + self.expr(parent=current) + return current + + def v_filter(self, parent=None): + with UnparserRuleContext(self, "v_filter", parent) as rule: + current = rule.current + self._reserve(3, self.T__26, parent=current) + self._reserve(2, self.VAR, parent=current) + self._reserve(1, self.T__24, parent=current) + self.expr(parent=current) + return current + + def NUM(self, parent=None): + with UnlexerRuleContext(self, "NUM", parent) as rule: + current = rule.current + with AlternationContext( + rule, + 0, + ProgramGenerator._alt_sizes[0], + 0, + ProgramGenerator._alt_conds[0], + ) as alt0: + current = rule.current + choice0 = alt0() + if choice0 == 0: + current.src += "0" + elif choice0 == 1: + current.src += self._model.charset( + current, 0, ProgramGenerator._charsets[1] + ) + with QuantifierContext( + rule, 0, 0, inf, ProgramGenerator._quant_sizes[0], 0 + ) as quant0: + while quant0(): + with QuantifiedContext(rule): + current = rule.current + current.src += self._model.charset( + current, 1, ProgramGenerator._charsets[2] + ) + current = rule.current + current = rule.current + return current + + def VAR(self, parent=None): + with UnlexerRuleContext(self, "VAR", parent) as rule: + current = rule.current + current.src += self._model.charset( + current, 0, ProgramGenerator._charsets[3] + ) + with QuantifierContext( + rule, 0, 0, inf, ProgramGenerator._quant_sizes[0], 0 + ) as quant0: + while quant0(): + with QuantifiedContext(rule): + current = rule.current + current.src += self._model.charset( + current, 1, ProgramGenerator._charsets[4] + ) + current = rule.current + if current.src in KEYWORDS: + current.src += self._model.charset( + current, 1, ProgramGenerator._charsets[4] + ) + return current + + def CHAR(self, parent=None): + with UnlexerRuleContext(self, "CHAR", parent) as rule: + current = rule.current + current.src += '"' + current.src += self._model.charset( + current, 0, ProgramGenerator._charsets[3] + ) + current.src += '"' + return current + + def NEWLINE(self, parent=None): + with UnlexerRuleContext(self, "NEWLINE", parent) as rule: + current = rule.current + with QuantifierContext( + rule, 0, 1, inf, ProgramGenerator._quant_sizes[0], 0 + ) as quant0: + while quant0(): + with QuantifiedContext(rule): + current = rule.current + current.src += self._model.charset( + current, 0, ProgramGenerator._charsets[5] + ) + current = rule.current + return current + + def WS(self, parent=None): + with UnlexerRuleContext(self, "WS", parent) as rule: + current = rule.current + with QuantifierContext( + rule, 0, 1, inf, ProgramGenerator._quant_sizes[0], 0 + ) as quant0: + while quant0(): + with QuantifiedContext(rule): + current = rule.current + with AlternationContext( + rule, + 0, + ProgramGenerator._alt_sizes[0], + 0, + ProgramGenerator._alt_conds[0], + ) as alt0: + current = rule.current + current.src += ["\t", " "][alt0()] + current = rule.current + current = rule.current + return current + + def T__0(self, parent=None): + with UnlexerRuleContext(self, "T__0", parent) as rule: + current = rule.current + current.src += "let" + return current + + def T__1(self, parent=None): + with UnlexerRuleContext(self, "T__1", parent) as rule: + current = rule.current + current.src += "is" + return current + + def T__2(self, parent=None): + with UnlexerRuleContext(self, "T__2", parent) as rule: + current = rule.current + current.src += "graph" + return current + + def T__3(self, parent=None): + with UnlexerRuleContext(self, "T__3", parent) as rule: + current = rule.current + current.src += "=" + return current + + def T__4(self, parent=None): + with UnlexerRuleContext(self, "T__4", parent) as rule: + current = rule.current + current.src += "remove" + return current + + def T__5(self, parent=None): + with UnlexerRuleContext(self, "T__5", parent) as rule: + current = rule.current + current.src += "vertex" + return current + + def T__6(self, parent=None): + with UnlexerRuleContext(self, "T__6", parent) as rule: + current = rule.current + current.src += "edge" + return current + + def T__7(self, parent=None): + with UnlexerRuleContext(self, "T__7", parent) as rule: + current = rule.current + current.src += "vertices" + return current + + def T__8(self, parent=None): + with UnlexerRuleContext(self, "T__8", parent) as rule: + current = rule.current + current.src += "from" + return current + + def T__9(self, parent=None): + with UnlexerRuleContext(self, "T__9", parent) as rule: + current = rule.current + current.src += "add" + return current + + def T__10(self, parent=None): + with UnlexerRuleContext(self, "T__10", parent) as rule: + current = rule.current + current.src += "to" + return current + + def T__11(self, parent=None): + with UnlexerRuleContext(self, "T__11", parent) as rule: + current = rule.current + current.src += "[" + return current + + def T__12(self, parent=None): + with UnlexerRuleContext(self, "T__12", parent) as rule: + current = rule.current + current.src += "," + return current + + def T__13(self, parent=None): + with UnlexerRuleContext(self, "T__13", parent) as rule: + current = rule.current + current.src += "]" + return current + + def T__14(self, parent=None): + with UnlexerRuleContext(self, "T__14", parent) as rule: + current = rule.current + current.src += "(" + return current + + def T__15(self, parent=None): + with UnlexerRuleContext(self, "T__15", parent) as rule: + current = rule.current + current.src += ")" + return current + + def T__16(self, parent=None): + with UnlexerRuleContext(self, "T__16", parent) as rule: + current = rule.current + current.src += "|" + return current + + def T__17(self, parent=None): + with UnlexerRuleContext(self, "T__17", parent) as rule: + current = rule.current + current.src += "^" + return current + + def T__18(self, parent=None): + with UnlexerRuleContext(self, "T__18", parent) as rule: + current = rule.current + current.src += "." + return current + + def T__19(self, parent=None): + with UnlexerRuleContext(self, "T__19", parent) as rule: + current = rule.current + current.src += "&" + return current + + def T__20(self, parent=None): + with UnlexerRuleContext(self, "T__20", parent) as rule: + current = rule.current + current.src += ".." + return current + + def T__21(self, parent=None): + with UnlexerRuleContext(self, "T__21", parent) as rule: + current = rule.current + current.src += "return" + return current + + def T__22(self, parent=None): + with UnlexerRuleContext(self, "T__22", parent) as rule: + current = rule.current + current.src += "where" + return current + + def T__23(self, parent=None): + with UnlexerRuleContext(self, "T__23", parent) as rule: + current = rule.current + current.src += "reachable" + return current + + def T__24(self, parent=None): + with UnlexerRuleContext(self, "T__24", parent) as rule: + current = rule.current + current.src += "in" + return current + + def T__25(self, parent=None): + with UnlexerRuleContext(self, "T__25", parent) as rule: + current = rule.current + current.src += "by" + return current + + def T__26(self, parent=None): + with UnlexerRuleContext(self, "T__26", parent) as rule: + current = rule.current + current.src += "for" + return current + + _default_rule = prog + + _immutable_rules = ( + "T__0", + "T__1", + "T__10", + "T__11", + "T__12", + "T__13", + "T__14", + "T__15", + "T__16", + "T__17", + "T__18", + "T__19", + "T__2", + "T__20", + "T__21", + "T__22", + "T__23", + "T__24", + "T__25", + "T__26", + "T__3", + "T__4", + "T__5", + "T__6", + "T__7", + "T__8", + "T__9", + ) + + _rule_sizes = { + "prog": RuleSize(3, 4), + "stmt": RuleSize(2, 4), + "declare": RuleSize(1, 4), + "bind": RuleSize(2, 4), + "remove": RuleSize(2, 5), + "add": RuleSize(2, 5), + "expr": RuleSize(1, 1), + "set_expr": RuleSize(2, 3), + "edge_expr": RuleSize(2, 7), + "regexp": RuleSize(1, 1), + "myrange": RuleSize(1, 4), + "select": RuleSize(2, 11), + "v_filter": RuleSize(2, 4), + "NUM": RuleSize(0, 0), + "VAR": RuleSize(0, 0), + "CHAR": RuleSize(0, 0), + "NEWLINE": RuleSize(0, 0), + "WS": RuleSize(0, 0), + "T__0": RuleSize(0, 0), + "T__1": RuleSize(0, 0), + "T__2": RuleSize(0, 0), + "T__3": RuleSize(0, 0), + "T__4": RuleSize(0, 0), + "T__5": RuleSize(0, 0), + "T__6": RuleSize(0, 0), + "T__7": RuleSize(0, 0), + "T__8": RuleSize(0, 0), + "T__9": RuleSize(0, 0), + "T__10": RuleSize(0, 0), + "T__11": RuleSize(0, 0), + "T__12": RuleSize(0, 0), + "T__13": RuleSize(0, 0), + "T__14": RuleSize(0, 0), + "T__15": RuleSize(0, 0), + "T__16": RuleSize(0, 0), + "T__17": RuleSize(0, 0), + "T__18": RuleSize(0, 0), + "T__19": RuleSize(0, 0), + "T__20": RuleSize(0, 0), + "T__21": RuleSize(0, 0), + "T__22": RuleSize(0, 0), + "T__23": RuleSize(0, 0), + "T__24": RuleSize(0, 0), + "T__25": RuleSize(0, 0), + "T__26": RuleSize(0, 0), + } + + _alt_sizes = ( + (RuleSize(0, 0), RuleSize(0, 0)), # 0 + (RuleSize(3, 4), RuleSize(3, 5), RuleSize(3, 5), RuleSize(2, 4)), # 1 + (RuleSize(1, 1), RuleSize(1, 1), RuleSize(1, 1)), # 2 + (RuleSize(1, 1), RuleSize(1, 1)), # 3 + ( + RuleSize(1, 1), + RuleSize(1, 1), + RuleSize(1, 1), + RuleSize(3, 7), + RuleSize(3, 3), + RuleSize(2, 1), + RuleSize(3, 11), + ), # 4 + ( + RuleSize(1, 1), + RuleSize(1, 1), + RuleSize(2, 3), + RuleSize(2, 3), + RuleSize(2, 6), + RuleSize(2, 3), + RuleSize(2, 3), + ), # 5 + ) + + _alt_conds = ( + (1, 1), # 0 + (1, 1, 1, 1), # 1 + (1, 1, 1), # 2 + (1, 1, 1, 1, 1, 1, 1), # 3 + ) + + _quant_sizes = ( + RuleSize(0, 0), # 0 + RuleSize(3, 4), # 1 + RuleSize(2, 2), # 2 + RuleSize(1, 1), # 3 + RuleSize(1, 2), # 4 + ) + + _charsets = ( + Generator._charset(((0x20, 0x7F),)), # 0 + Generator._charset(((0x31, 0x3A),)), # 1 + Generator._charset(((0x30, 0x3A),)), # 2 + Generator._charset(((0x61, 0x7B),)), # 3 + Generator._charset( + ( + (0x30, 0x3A), + (0x5F, 0x60), + (0x61, 0x7B), + ) + ), # 4 + Generator._charset( + ( + (0xA, 0xB), + (0xD, 0xE), + (0x20, 0x21), + ) + ), # 5 + ) diff --git a/tests/autotests/constants.py b/tests/autotests/constants.py new file mode 100644 index 000000000..905ecef16 --- /dev/null +++ b/tests/autotests/constants.py @@ -0,0 +1,8 @@ +LABEL: str = "label" +IS_FINAL: str = "is_final" +IS_START: str = "is_start" +REGEXP = "regexp" +CFG = "cfg" +EBNF = "ebnf" + +LABELS: list[str] = ["a", "b", "c", "d", "e", "f", "g", "h"] diff --git a/tests/autotests/fixtures.py b/tests/autotests/fixtures.py new file mode 100644 index 000000000..7df24c3e2 --- /dev/null +++ b/tests/autotests/fixtures.py @@ -0,0 +1,14 @@ +import pytest +from helper import generate_rnd_graph, generate_rnd_dense_graph +from networkx import MultiDiGraph +from constants import LABELS +import random + +funcs = [generate_rnd_dense_graph, generate_rnd_graph] + + +@pytest.fixture(scope="function", params=range(8)) +def graph(request) -> MultiDiGraph: + fun = random.choice(funcs) + # task 6 takes a long time if there are ranges [1, 100] + return fun(1, 40, LABELS) diff --git a/tests/autotests/grammars_constants.py b/tests/autotests/grammars_constants.py new file mode 100644 index 000000000..aee8e8b60 --- /dev/null +++ b/tests/autotests/grammars_constants.py @@ -0,0 +1,332 @@ +from pyformlang.cfg import cfg +from constants import * + +GRAMMARS_TABLE: list[dict[str, list[str | cfg.CFG]]] = [ + { + REGEXP: ["(a | b | c)*(d | e | f)*"], + CFG: [], + EBNF: ["S -> (a | b | c)*(d | e | f)*"], + }, + {REGEXP: ["(a b) | (a c)"], CFG: [], EBNF: ["S -> (a b) | (a c)"]}, + {REGEXP: ["a b c*"], CFG: [], EBNF: ["S -> a b c*"]}, + {REGEXP: ["(a b d) | (a b c)"], CFG: [], EBNF: ["S -> (a b d) | (a b c)"]}, + { + REGEXP: ["(a|b|c|d|e)(a|b|c|d|e)*"], + CFG: [], + EBNF: ["S -> (a|b|c|d|e)(a|b|c|d|e)*"], + }, + {REGEXP: ["(a|b)*(c|d)*"], CFG: [], EBNF: ["S -> (a|b)*(c|d)*"]}, + {REGEXP: ["(a|b|c|d|e)f*"], CFG: [], EBNF: ["S -> (a|b|c|d|e)f*"]}, + {REGEXP: ["a a"], CFG: [], EBNF: ["S -> a a"]}, + {REGEXP: ["a b*"], CFG: [], EBNF: ["S -> a b*"]}, + {REGEXP: ["a b"], CFG: [], EBNF: ["S -> a b"]}, + {REGEXP: ["(a b) | (a b c)"], CFG: [], EBNF: ["S -> (a b) | (a b c)"]}, + {REGEXP: ["a|c"], CFG: [], EBNF: ["S -> a|c"]}, + {REGEXP: ["(a|c)(b|d)"], CFG: [], EBNF: ["S -> (a|c)(b|d)"]}, + {REGEXP: ["b"], CFG: [cfg.CFG.from_text("S -> b")], EBNF: ["S -> b"]}, + {REGEXP: ["a*a*b"], CFG: [], EBNF: ["S -> a*a*b"]}, + { + REGEXP: ["((a | b)*c)*((d | e)*f)*"], + CFG: [], + EBNF: ["S -> ((a | b)*c)*((d | e)*f)*"], + }, + {REGEXP: ["((a b d) | (a b c))*"], CFG: [], EBNF: ["S -> ((a b d) | (a b c))*"]}, + {REGEXP: ["(a|c)*"], CFG: [], EBNF: ["S -> (a|c)*"]}, + {REGEXP: ["(a | c)*(a | b)*"], CFG: [], EBNF: ["S -> (a | c)*(a | b)*"]}, + { + REGEXP: ["(a | b)*(c | d)*(e | f)*"], + CFG: [], + EBNF: ["S -> (a | b)*(c | d)*(e | f)*"], + }, + { + REGEXP: ["a*(a | b)*", "(a|b)*", "a* | (a | b)*"], + CFG: [], + EBNF: ["S -> a*(a | b)*"], + }, + {REGEXP: ["(a b d)* | (a b c)*"], CFG: [], EBNF: ["S -> (a b d)* | (a b c)*"]}, + {REGEXP: ["a b* c"], CFG: [], EBNF: ["S -> a b* c"]}, + {REGEXP: ["(a a)*"], CFG: [], EBNF: ["S -> (a a)*"]}, + {REGEXP: ["((a|b)*c)*"], CFG: [], EBNF: ["S -> ((a|b)*c)*"]}, + {REGEXP: ["a b c d"], CFG: [], EBNF: ["S -> a b c d"]}, + { + REGEXP: [ + "b b b", + ], + CFG: [], + EBNF: [ + "S -> b b b", + ], + }, + {REGEXP: ["(a b d*) | (a b c*)"], CFG: [], EBNF: ["S -> (a b d*) | (a b c*)"]}, + { + REGEXP: ["a", "a | a"], + CFG: [ + cfg.CFG.from_text("S -> a"), + cfg.CFG.from_text( + """ + S -> N B + B -> $ + N -> a + """ + ), + ], + EBNF: ["S -> a"], + }, + { + REGEXP: ["a*", "a* a*", "a* | a"], + CFG: [ + cfg.CFG.from_text("S -> $ | a S"), + cfg.CFG.from_text("S -> $ | S S | a"), + cfg.CFG.from_text("S -> S a S | $"), + ], + EBNF: ["S -> a*"], + }, + { + REGEXP: ["a b c"], + CFG: [ + cfg.CFG.from_text("S -> a b c"), + cfg.CFG.from_text( + """ + S -> a B + B -> b c + """ + ), + ], + EBNF: ["S -> a b c"], + }, + { + REGEXP: ["a*b*"], + CFG: [ + cfg.CFG.from_text( + """ + S -> S1 S2 + S2 -> $ | b S2 + S1 -> $ | a S1 + """ + ), + cfg.CFG.from_text( + """ + S -> $ | S1 | a S + S1 -> $ | b S1 + """ + ), + ], + EBNF: ["S -> a*b*"], + }, + { + REGEXP: ["(a b)*"], + CFG: [ + cfg.CFG.from_text("S -> $ | a b S"), + cfg.CFG.from_text( + """ + S -> $ | S S1 + S1 -> a b + """ + ), + ], + EBNF: ["S -> (a b)*"], + }, + { + REGEXP: ["a b*c*"], + CFG: [ + cfg.CFG.from_text( + """ + S -> S1 S2 S3 + S1 -> a + S2 -> $ | S2 b + S3 -> $ | c S3 + """ + ), + cfg.CFG.from_text( + """ + S -> a S2 S3 + S2 -> S2 b | $ + S3 -> c | $ | S3 S3 + """ + ), + ], + EBNF: ["S -> a b*c*"], + }, + { + REGEXP: ["(a|b|c|d|e)*"], + CFG: [ + cfg.CFG.from_text( + """ + S -> $ | S1 S + S1 -> a | b | c | d | e + """ + ), + cfg.CFG.from_text("S -> $ | a | b | c | d | e | S S"), + cfg.CFG.from_text("S -> $ | a S | b S | c S | e S | d S"), + ], + EBNF: ["S -> (a|b|c|d|e)*"], + }, + { + REGEXP: ["((a | b) * c)*(d | e)"], + CFG: [ + cfg.CFG.from_text( + """ + S -> S1 S2 + S1 -> S1 S1 | $ | S3 c + S2 -> d | e + S3 -> b S3 | $ | a S3 + """ + ), + cfg.CFG.from_text( + """ + S -> S1 d | S1 e + S1 -> S1 S3 c | $ + S3 -> b S3 | $ | a S3 + """ + ), + ], + EBNF: ["S -> ((a | b) * c)*(d | e)"], + }, + { + REGEXP: [], + CFG: [ + cfg.CFG.from_text("S -> $ | a S b | S S"), + cfg.CFG.from_text("S -> $ | a S b S"), + cfg.CFG.from_text("S -> $ | S a S b"), + cfg.CFG.from_text("S -> $ | a S b | S S S"), + ], + EBNF: ["S -> $ | a S b | S S"], + }, + { + REGEXP: [], + CFG: [ + cfg.CFG.from_text("S -> $ | a S b | c S d | S S"), + cfg.CFG.from_text("S -> $ | a S b S | c S d S"), + cfg.CFG.from_text("S -> $ | S a S b | S c S d"), + cfg.CFG.from_text("S -> $ | a S b | c S d S | S S S"), + ], + EBNF: ["S -> $ | a S b | c S d | S S"], + }, + { + REGEXP: [], + CFG: [ + cfg.CFG.from_text( + """ + S -> $ | S1 S S2 | S S + S1 -> a | c + S2 -> b | d + """ + ), + cfg.CFG.from_text( + """ + S -> $ | S1 S S2 S + S1 -> a | c + S2 -> b | d + """ + ), + cfg.CFG.from_text("S -> $ | S a S b | S a S d | S c S d | S c S b"), + cfg.CFG.from_text( + """ + S -> $ | S1 S S2 | S S S + S1 -> a | c + S2-> b | d + """ + ), + ], + EBNF: ["S -> $ | S a S b | S a S d | S c S d | S c S b"], + }, + { + REGEXP: [], + CFG: [ + cfg.CFG.from_text( + """ + S -> S S | Se S1 Se + Se -> $ | Se e + S1 -> $ | a S1 b + """ + ), + cfg.CFG.from_text( + """ + S -> S1 | S S | e + S1 -> $ | a S1 b + """ + ), + cfg.CFG.from_text( + """ + S -> S2 S | $ + S2 -> e | S1 + S1 -> $ | a S1 b + """ + ), + cfg.CFG.from_text( + """ + S -> $ | S1 S | e S + S1 -> $ | a S1 b + """ + ), + ], + EBNF: [ + """ + S -> S1 | S S | e + S1 -> $ | a S1 b + """ + ], + }, + { + REGEXP: [], + CFG: [ + cfg.CFG.from_text("S -> a S | $"), + cfg.CFG.from_text( + """ + S -> S1 | a + S1 -> a S1 | $ + """ + ), + ], + EBNF: ["S -> a S | $"], + }, + { + REGEXP: [], + CFG: [ + cfg.CFG.from_text( + """ + S -> S1 | S2 + S1 -> Sab | S1 c + Sab -> $ | a Sab b + S2 -> Sbc | a S2 + Sbc -> $ | b Sbc c + """ + ) + ], + EBNF: [ + """ + S -> ( Sab c* ) | ( a* Sbc ) | $ + Sab -> a Sab b | $ + Sbc -> b Sbc c | $ + """ + ], + }, + { + REGEXP: [], + CFG: [cfg.CFG.from_text("S -> a | b | S c S | S d S | e S f | g S")], + EBNF: ["S -> a | b | (S ( c | d ) S ) | ( e S f ) | ( g S )"], + }, + { + REGEXP: [], + CFG: [ + cfg.CFG.from_text( + "S -> $ | a S b | b S a | e S f | S S | c S d | d S c | f S e" + ), + ], + EBNF: [ + "S -> ( ( a S b ) | ( b S a ) | ( c S d ) | ( d S c ) | ( e S f ) | (f S e) )*" + ], + }, +] + +REGEXP_CFG: list[tuple[str, list[cfg.CFG]]] = [ + (regexp, ds[CFG]) for ds in GRAMMARS_TABLE for regexp in ds[REGEXP] +] +GRAMMARS: list[list[cfg.CFG]] = [ds[CFG] for ds in GRAMMARS_TABLE if len(ds[CFG]) > 1] +GRAMMARS_DIFFERENT: list[cfg.CFG] = [ + ds[CFG][0] for ds in GRAMMARS_TABLE if len(ds[CFG]) >= 1 +] +CFG_EBNF: list[tuple[list[cfg.CFG], list[str]]] = [ + (ds[CFG], ds[EBNF]) for ds in GRAMMARS_TABLE +] +REGEXES = [regex_str for ds in GRAMMARS_TABLE for regex_str in ds[REGEXP]] diff --git a/tests/autotests/helper.py b/tests/autotests/helper.py new file mode 100644 index 000000000..128f4d2f1 --- /dev/null +++ b/tests/autotests/helper.py @@ -0,0 +1,109 @@ +import random +import cfpq_data +import copy +from networkx import MultiDiGraph +import itertools +import networkx as nx +from constants import * + + +def generate_rnd_graph( + min_size: int, max_size: int, labels: list[str] +) -> nx.MultiDiGraph: + n_of_nodes = random.randint(min_size, max_size) + return cfpq_data.graphs.labeled_scale_free_graph(n_of_nodes, labels=labels) + + +def generate_rnd_dense_graph( + min_size: int, max_size: int, labels: list[str] +) -> MultiDiGraph: + n_of_nodes = random.randint(min_size, max_size) + return cfpq_data.graphs.labeled_binomial_graph(n=n_of_nodes, p=0.4, labels=labels) + + +def generate_rnd_start_and_final(graph: nx.MultiDiGraph) -> tuple[set[int], set[int]]: + start_nodes = set( + random.choices( + list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) + ) + ) + final_nodes = set( + random.choices( + list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) + ) + ) + + for node, data in graph.nodes(data=True): + if node in start_nodes: + data[IS_START] = True + if node in final_nodes: + data[IS_FINAL] = True + return start_nodes, final_nodes + + +def rpq_dict_to_set(rpq: dict[int, set[int]]) -> set[tuple[int, int]]: + rpq_set = set() + for node_from, nodes_to in rpq.items(): + for node_to in nodes_to: + rpq_set.add((node_from, node_to)) + return rpq_set + + +class GraphWordsHelper: + graph = None + final_nodes = None + transitive_closure = None + start_nodes = None + + def __init__(self, graph: MultiDiGraph): + self.graph = graph.copy() + self.final_nodes = { + node for node, data in self.graph.nodes(data=IS_FINAL) if data + } + self.start_nodes = { + node for node, data in self.graph.nodes(data=IS_START) if data + } + self.transitive_closure: nx.MultiDiGraph = nx.transitive_closure( + copy.deepcopy(self.graph), reflexive=False + ) + + def is_reachable(self, source, target): + return target in self.transitive_closure[source].keys() + + def _exists_any_final_path(self, node): + return any( + self.is_reachable(node, final_node) for final_node in self.final_nodes + ) + + def _take_a_step(self, node): + for node_to, edge_dict in dict(self.graph[node]).items(): + for edge_data in edge_dict.values(): + yield node_to, edge_data[LABEL] + + def _is_final_node(self, node): + return node in self.final_nodes + + def generate_words_by_node(self, node): + queue = [(node, [])] + while len(queue) != 0: + (n, word) = queue.pop(0) + for node_to, label in self._take_a_step(n): + tmp = word.copy() + tmp.append(label) + if self._is_final_node(node_to): + yield tmp.copy() + if self._exists_any_final_path(node_to): + queue.append((node_to, tmp.copy())) + + def take_n_words_by_node(self, node, n): + if self._exists_any_final_path(node): + return list(itertools.islice(self.generate_words_by_node(node), 0, n)) + return [] + + def get_words_with_limiter(self, limiter: int) -> list[str]: + result = list() + for start in self.start_nodes: + result.extend(self.take_n_words_by_node(start, limiter)) + if start in self.final_nodes: + result.append([]) + return result diff --git a/tests/autotests/program_examples/example_1.ac36fe75aed14aaf9dbf5a095d1e0f54.grtp b/tests/autotests/program_examples/example_1.ac36fe75aed14aaf9dbf5a095d1e0f54.grtp new file mode 100644 index 000000000..71fc36e4a Binary files /dev/null and b/tests/autotests/program_examples/example_1.ac36fe75aed14aaf9dbf5a095d1e0f54.grtp differ diff --git a/tests/autotests/program_examples/example_10.dd8eda2a68bd4f7eacca6db35984e5ef.grtp b/tests/autotests/program_examples/example_10.dd8eda2a68bd4f7eacca6db35984e5ef.grtp new file mode 100644 index 000000000..5a74f1e20 Binary files /dev/null and b/tests/autotests/program_examples/example_10.dd8eda2a68bd4f7eacca6db35984e5ef.grtp differ diff --git a/tests/autotests/program_examples/example_11.99692263ce9f4a27ae06db82880dd5ed.grtp b/tests/autotests/program_examples/example_11.99692263ce9f4a27ae06db82880dd5ed.grtp new file mode 100644 index 000000000..df968042e Binary files /dev/null and b/tests/autotests/program_examples/example_11.99692263ce9f4a27ae06db82880dd5ed.grtp differ diff --git a/tests/autotests/program_examples/example_12.cb66f2c27dce427cb3ef857a033c04de.grtp b/tests/autotests/program_examples/example_12.cb66f2c27dce427cb3ef857a033c04de.grtp new file mode 100644 index 000000000..9a0e7ade5 Binary files /dev/null and b/tests/autotests/program_examples/example_12.cb66f2c27dce427cb3ef857a033c04de.grtp differ diff --git a/tests/autotests/program_examples/example_13.3b5c36a9a11240f798443665eabbc39e.grtp b/tests/autotests/program_examples/example_13.3b5c36a9a11240f798443665eabbc39e.grtp new file mode 100644 index 000000000..3b9d4b757 Binary files /dev/null and b/tests/autotests/program_examples/example_13.3b5c36a9a11240f798443665eabbc39e.grtp differ diff --git a/tests/autotests/program_examples/example_14.a44edfd713cb429b8ef60dbe3e2a3499.grtp b/tests/autotests/program_examples/example_14.a44edfd713cb429b8ef60dbe3e2a3499.grtp new file mode 100644 index 000000000..cc6f9a13f Binary files /dev/null and b/tests/autotests/program_examples/example_14.a44edfd713cb429b8ef60dbe3e2a3499.grtp differ diff --git a/tests/autotests/program_examples/example_15.759a2056fea6465e913deafa2acf6735.grtp b/tests/autotests/program_examples/example_15.759a2056fea6465e913deafa2acf6735.grtp new file mode 100644 index 000000000..8406eb839 Binary files /dev/null and b/tests/autotests/program_examples/example_15.759a2056fea6465e913deafa2acf6735.grtp differ diff --git a/tests/autotests/program_examples/example_16.2a29e19d5635406e89e88e89d062a1d2.grtp b/tests/autotests/program_examples/example_16.2a29e19d5635406e89e88e89d062a1d2.grtp new file mode 100644 index 000000000..cefb74e4d Binary files /dev/null and b/tests/autotests/program_examples/example_16.2a29e19d5635406e89e88e89d062a1d2.grtp differ diff --git a/tests/autotests/program_examples/example_2.10027d27b1fe44d1a5f7b67e36e1be4d.grtp b/tests/autotests/program_examples/example_2.10027d27b1fe44d1a5f7b67e36e1be4d.grtp new file mode 100644 index 000000000..95ac573ee Binary files /dev/null and b/tests/autotests/program_examples/example_2.10027d27b1fe44d1a5f7b67e36e1be4d.grtp differ diff --git a/tests/autotests/program_examples/example_3.6b5d6506d9ee4e06b8f1fe7a6df4f408.grtp b/tests/autotests/program_examples/example_3.6b5d6506d9ee4e06b8f1fe7a6df4f408.grtp new file mode 100644 index 000000000..0d1835e46 Binary files /dev/null and b/tests/autotests/program_examples/example_3.6b5d6506d9ee4e06b8f1fe7a6df4f408.grtp differ diff --git a/tests/autotests/program_examples/example_4.812eb6125e1347ac8a2337d43be8f4e8.grtp b/tests/autotests/program_examples/example_4.812eb6125e1347ac8a2337d43be8f4e8.grtp new file mode 100644 index 000000000..5b5df7bb3 Binary files /dev/null and b/tests/autotests/program_examples/example_4.812eb6125e1347ac8a2337d43be8f4e8.grtp differ diff --git a/tests/autotests/program_examples/example_5.26caac5c8604430abdd5dbdf49358f02.grtp b/tests/autotests/program_examples/example_5.26caac5c8604430abdd5dbdf49358f02.grtp new file mode 100644 index 000000000..f99382e7d Binary files /dev/null and b/tests/autotests/program_examples/example_5.26caac5c8604430abdd5dbdf49358f02.grtp differ diff --git a/tests/autotests/program_examples/example_6.a7391a181bdb4cb59fc172fe760becb1.grtp b/tests/autotests/program_examples/example_6.a7391a181bdb4cb59fc172fe760becb1.grtp new file mode 100644 index 000000000..b90392cdb Binary files /dev/null and b/tests/autotests/program_examples/example_6.a7391a181bdb4cb59fc172fe760becb1.grtp differ diff --git a/tests/autotests/program_examples/example_7.06818e72bb014b47b3e4dcd1457bcdae.grtp b/tests/autotests/program_examples/example_7.06818e72bb014b47b3e4dcd1457bcdae.grtp new file mode 100644 index 000000000..de01fce0c Binary files /dev/null and b/tests/autotests/program_examples/example_7.06818e72bb014b47b3e4dcd1457bcdae.grtp differ diff --git a/tests/autotests/program_examples/example_8.518b39be43534596b088aa151b2dc1f9.grtp b/tests/autotests/program_examples/example_8.518b39be43534596b088aa151b2dc1f9.grtp new file mode 100644 index 000000000..69e48a0ee Binary files /dev/null and b/tests/autotests/program_examples/example_8.518b39be43534596b088aa151b2dc1f9.grtp differ diff --git a/tests/autotests/program_examples/example_9.100fd4999b124566afdd7b0da134d840.grtp b/tests/autotests/program_examples/example_9.100fd4999b124566afdd7b0da134d840.grtp new file mode 100644 index 000000000..34d3d1b14 Binary files /dev/null and b/tests/autotests/program_examples/example_9.100fd4999b124566afdd7b0da134d840.grtp differ diff --git a/tests/autotests/rpq_template_test.py b/tests/autotests/rpq_template_test.py new file mode 100644 index 000000000..9e8a96585 --- /dev/null +++ b/tests/autotests/rpq_template_test.py @@ -0,0 +1,67 @@ +from copy import deepcopy +from helper import generate_rnd_start_and_final, rpq_dict_to_set +from networkx import MultiDiGraph +from pyformlang.cfg import CFG +from pyformlang.rsa import RecursiveAutomaton +from typing import Callable, Iterable + +try: + from project.task2 import graph_to_nfa, regex_to_dfa + from project.task3 import FiniteAutomaton + from project.task4 import reachability_with_constraints +except ImportError: + pass + + +def rpq_cfpq_test( + graph: MultiDiGraph, + regex_str: str, + cfg_list: Iterable[CFG], + function: Callable[[CFG, MultiDiGraph, set[int], set[int]], set[tuple[int, int]]], +) -> None: + start_nodes, final_nodes = generate_rnd_start_and_final(graph) + for cf_gram in cfg_list: + cfpq: set[tuple[int, int]] = function( + cf_gram, deepcopy(graph), start_nodes, final_nodes + ) + rpq: set[tuple[int, int]] = rpq_dict_to_set( + reachability_with_constraints( + FiniteAutomaton(graph_to_nfa(graph, start_nodes, final_nodes)), + FiniteAutomaton(regex_to_dfa(regex_str)), + ) + ) + assert cfpq == rpq + + +def different_grammars_test( + graph: MultiDiGraph, + eq_grammars: Iterable[CFG], + function: Callable[[CFG, MultiDiGraph, set[int], set[int]], set[tuple[int, int]]], +) -> None: + start_nodes, final_nodes = generate_rnd_start_and_final(graph) + eq_cfpqs = [ + function(cf_gram, deepcopy(graph), start_nodes, final_nodes) + for cf_gram in eq_grammars + ] + assert eq_cfpqs.count(eq_cfpqs[0]) == len(eq_cfpqs) + + +def cfpq_algorithm_test( + graph: MultiDiGraph, + ebnf_list: Iterable[str], + cfg_list: Iterable[CFG], + ebnf_to_rsm: Callable[[str], RecursiveAutomaton], + cfg_to_rsm: Callable[[CFG], RecursiveAutomaton], + function: Callable[ + [RecursiveAutomaton, MultiDiGraph, set[int], set[int]], set[tuple[int, int]] + ], +) -> None: + start_nodes, final_nodes = generate_rnd_start_and_final(graph) + rsm_list = [] + rsm_list.extend(ebnf_to_rsm(ebnf) for ebnf in ebnf_list) + rsm_list.extend(cfg_to_rsm(cfg) for cfg in cfg_list) + eq_cfpqs = [ + function((deepcopy(rsm)), deepcopy(graph), start_nodes, final_nodes) + for rsm in rsm_list + ] + assert eq_cfpqs.count(eq_cfpqs[0]) == len(eq_cfpqs) diff --git a/tests/autotests/test_task11.py b/tests/autotests/test_task11.py new file mode 100644 index 000000000..e9c397c95 --- /dev/null +++ b/tests/autotests/test_task11.py @@ -0,0 +1,88 @@ +# This file contains test cases that you need to pass to get a grade +# You MUST NOT touch anything here except ONE block below +# You CAN modify this file IF AND ONLY IF you have found a bug and are willing to fix it +# Otherwise, please report it +import inspect +import io +from contextlib import redirect_stdout + +import pytest +import re +from grammarinator.runtime import RuleSize, simple_space_serializer +from grammarinator.tool import ( + PickleTreeCodec, + GeneratorTool, + DefaultGeneratorFactory, + DefaultPopulation, +) + +import ProgramGenerator + +# Fix import statements in try block to run tests +try: + from project.task11 import prog_to_tree, nodes_count, tree_to_prog +except ImportError: + pytestmark = pytest.mark.skip("Task 11 is not ready to test!") + + +# This fixture probably uses internal API, so it can be broken at any time :( +@pytest.fixture(scope="module") +def generator(request) -> GeneratorTool: + return GeneratorTool( + generator_factory=DefaultGeneratorFactory( + ProgramGenerator.ProgramGenerator, + model_class=None, + cooldown=0.2, + weights=None, + lock=None, + listener_classes=None, + ), + rule="prog", + out_format="", + limit=RuleSize(depth=20, tokens=RuleSize.max.tokens), + population=( + DefaultPopulation( + "tests/autotests/program_examples/", "grtp", PickleTreeCodec() + ) + if True + else None + ), + generate=True, + mutate=True, + recombine=True, + keep_trees=False, + transformers=[], + serializer=simple_space_serializer, + cleanup=False, + encoding="utf-8", + errors="strict", + dry_run=False, + ) + + +@pytest.fixture(params=range(100)) +def program(generator, request) -> str: + # Grammarinator's API cannot return plain string, it can either write to the file or to the stdout + # So we catch stdout + with io.StringIO() as buf, redirect_stdout(buf): + with generator: + generator.create(request.param) + out = buf.getvalue() + return out + + +class TestParser: + def test_id(self, program: str): + tree_before, is_valid = prog_to_tree(program) + assert is_valid + program_after = tree_to_prog(tree_before) + tree_after, is_valid_after = prog_to_tree(program_after) + assert is_valid_after + assert nodes_count(tree_before) == nodes_count(tree_after) + + def test_wrong(self, program: str): + reg = re.compile("[=,]", re.X) + if reg.search(program): + program_bad = reg.sub("", program) + _, is_valid_bad = prog_to_tree(program_bad) + assert not is_valid_bad diff --git a/tests/autotests/test_task2.py b/tests/autotests/test_task2.py index 2eb989eef..d352635fb 100644 --- a/tests/autotests/test_task2.py +++ b/tests/autotests/test_task2.py @@ -2,15 +2,16 @@ # You MUST NOT touch anything here except ONE block below # You CAN modify this file IF AND ONLY IF you have found a bug and are willing to fix it # Otherwise, please report it -import copy - -import pyformlang.finite_automaton +from pyformlang.finite_automaton import NondeterministicFiniteAutomaton from networkx import MultiDiGraph from pyformlang.regular_expression import Regex import pytest import random import itertools -import networkx as nx +from helper import GraphWordsHelper, generate_rnd_start_and_final +from constants import IS_FINAL, IS_START +from fixtures import graph +from grammars_constants import REGEXES # Fix import statements in try block to run tests try: @@ -18,26 +19,9 @@ except ImportError: pytestmark = pytest.mark.skip("Task 2 is not ready to test!") -REGEX_TO_TEST = [ - "(aa)*", - "a | a", - "a* | a", - "(ab) | (ac)", - "(ab) | (abc)", - "(abd) | (abc)", - "(abd*) | (abc*)", - "(abd)* | (abc)*", - "((abd) | (abc))*", - "a*a*", - "a*a*b", - "a* | (a | b)*", - "a*(a | b)*", - "(a | c)*(a | b)*", -] - class TestRegexToDfa: - @pytest.mark.parametrize("regex_str", REGEX_TO_TEST, ids=lambda regex: regex) + @pytest.mark.parametrize("regex_str", REGEXES) def test(self, regex_str: str) -> None: regex = Regex(regex_str) regex_cfg = regex.to_cfg() @@ -60,109 +44,16 @@ def test(self, regex_str: str) -> None: assert dfa.accepts(word) -LABELS = ["a", "b", "c", "x", "y", "z", "alpha", "beta", "gamma"] -LABEL = "label" -IS_FINAL = "is_final" -IS_START = "is_start" - - -class GraphWordsHelper: - graph = None - final_nodes = None - transitive_closure = None - start_nodes = None - - def __init__(self, graph: MultiDiGraph): - self.graph = graph.copy() - self.final_nodes = list( - map(lambda x: x[0], filter(lambda y: y[1], self.graph.nodes(data=IS_FINAL))) - ) - self.start_nodes = list( - map(lambda x: x[0], filter(lambda y: y[1], self.graph.nodes(data=IS_START))) - ) - self.transitive_closure: nx.MultiDiGraph = nx.transitive_closure( - copy.deepcopy(self.graph), reflexive=False - ) - - def is_reachable(self, source, target): - return target in self.transitive_closure[source].keys() - - def _exists_any_final_path(self, node): - for final_node in self.final_nodes: - if self.is_reachable(node, final_node): - return True - return False - - def _take_a_step(self, node): - for node_to, edge_dict in dict(self.graph[node]).items(): - for edge_data in edge_dict.values(): - yield node_to, edge_data[LABEL] - - def _is_final_node(self, node): - return node in self.final_nodes - - def generate_words_by_node(self, node): - queue = [(node, [])] - while len(queue) != 0: - (n, word) = queue.pop(0) - for node_to, label in self._take_a_step(n): - tmp = word.copy() - tmp.append(label) - if self._is_final_node(node_to): - yield tmp.copy() - if self._exists_any_final_path(node_to): - queue.append((node_to, tmp.copy())) - - def take_n_words_by_node(self, node, n): - if self._exists_any_final_path(node): - return list(itertools.islice(self.generate_words_by_node(node), 0, n)) - return [] - - def get_words_with_limiter(self, limiter: int) -> list[str]: - result = list() - for start in self.start_nodes: - result.extend(self.take_n_words_by_node(start, limiter)) - if start in self.final_nodes: - result.append([]) - return result - - -@pytest.fixture(scope="class", params=range(8)) -def graph(request) -> MultiDiGraph: - n_of_nodes = random.randint(1, 20) - graph = nx.scale_free_graph(n_of_nodes) - - for _, _, data in graph.edges(data=True): - data[LABEL] = random.choice(LABELS) - for _, data in graph.nodes(data=True): - data[IS_FINAL] = False - data[IS_START] = False - return graph - - class TestGraphToNfa: def test_random_start_and_final( self, graph: MultiDiGraph, ) -> None: - start_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - final_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) + copy_graph = graph.copy() + start_nodes, final_nodes = generate_rnd_start_and_final(graph) + nfa: NondeterministicFiniteAutomaton = graph_to_nfa( + copy_graph, start_nodes.copy(), final_nodes.copy() ) - nfa: pyformlang.finite_automaton.NondeterministicFiniteAutomaton = graph_to_nfa( - graph.copy(), start_nodes.copy(), final_nodes.copy() - ) - for node, data in graph.nodes(data=True): - if node in start_nodes: - data[IS_START] = True - if node in final_nodes: - data[IS_FINAL] = True words_helper = GraphWordsHelper(graph) words = words_helper.get_words_with_limiter(random.randint(10, 100)) if len(words) == 0: @@ -172,9 +63,7 @@ def test_random_start_and_final( assert nfa.accepts(word) def test_not_specified_start_and_final(self, graph: MultiDiGraph) -> None: - nfa: pyformlang.finite_automaton.NondeterministicFiniteAutomaton = graph_to_nfa( - graph.copy(), set(), set() - ) + nfa: NondeterministicFiniteAutomaton = graph_to_nfa(graph.copy(), set(), set()) for _, data in graph.nodes(data=True): data[IS_FINAL] = True data[IS_START] = True diff --git a/tests/autotests/test_task3.py b/tests/autotests/test_task3.py index 5a04f6b28..970300b55 100644 --- a/tests/autotests/test_task3.py +++ b/tests/autotests/test_task3.py @@ -2,13 +2,12 @@ # You MUST NOT touch anything here except ONE block below # You CAN modify this file IF AND ONLY IF you have found a bug and are willing to fix it # Otherwise, please report it -import pyformlang.finite_automaton -from networkx import MultiDiGraph from pyformlang.regular_expression import Regex +from pyformlang.cfg import CFG import pytest import random import itertools -import networkx as nx +from grammars_constants import REGEXES # Fix import statements in try block to run tests try: @@ -17,30 +16,10 @@ except ImportError: pytestmark = pytest.mark.skip("Task 3 is not ready to test!") -REGEX_TO_TEST = [ - ("a", "b"), - ("a", "a"), - ("a*", "a"), - ("a*", "aa"), - ("a*", "a*"), - ("(aa)*", "a*"), - ("(a|b)*", "a*"), - ("(a|b)*", "b"), - ("(a|b)*", "bbb"), - ("a|b", "a"), - ("a|b", "a|c"), - ("(a|b)(c|d)", "(a|c)(b|d)"), - ("(a|b)*", "(a|c)*"), - ("a*b*", "(a|b)*"), - ("(ab)*", "(a|b)*"), -] - class TestIntersect: @pytest.mark.parametrize( - "regex_str1, regex_str2", - REGEX_TO_TEST, - ids=lambda regex_tuple: regex_tuple, + "regex_str1, regex_str2", itertools.combinations(REGEXES, 2) ) def test(self, regex_str1: str, regex_str2: str) -> None: dfa1 = FiniteAutomaton(regex_to_dfa(regex_str1)) @@ -49,8 +28,8 @@ def test(self, regex_str1: str, regex_str2: str) -> None: regex1: Regex = Regex(regex_str1) regex2: Regex = Regex(regex_str2) - cfg_of_regex1: pyformlang.cfg.CFG = regex1.to_cfg() - intersect_cfg: pyformlang.cfg.CFG = cfg_of_regex1.intersection(regex2) + cfg_of_regex1: CFG = regex1.to_cfg() + intersect_cfg: CFG = cfg_of_regex1.intersection(regex2) words = intersect_cfg.get_words() if intersect_cfg.is_finite(): all_word_parts = list(words) diff --git a/tests/autotests/test_task4.py b/tests/autotests/test_task4.py index ecf895992..4f4c06f3c 100644 --- a/tests/autotests/test_task4.py +++ b/tests/autotests/test_task4.py @@ -4,10 +4,10 @@ # Otherwise, please report it import random from copy import deepcopy - -import cfpq_data import pytest -from networkx import MultiDiGraph +from grammars_constants import REGEXES +from helper import generate_rnd_start_and_final, rpq_dict_to_set +from fixtures import graph # Fix import statements in try block to run tests try: @@ -17,80 +17,24 @@ except ImportError: pytestmark = pytest.mark.skip("Task 4 is not ready to test!") -QUERIES = [ - "a", - "a*", - "ab", - "abc", - "abcd", - "a*b*", - "(ab)*", - "ab*", - "ab*c*", - "ab*c", - "abc*", - "(a|b|c|d|e)*", - "(a|b|c|d|e)(a|b|c|d|e)*", - "(a|b|c|d|e)f*", - "(a|b)*", - "(a|b)*(c|d)*", - "(a | b)*(c | d)*(e | f)*", - "(a | b | c)*(d | e | f)*", - "((a|b)*c)*", - "((a | b) * c)*(d | e)", - "((a | b)*c)*((d | e)*f)*", -] -LABELS = ["a", "b", "c", "d", "e", "f", "g", "h"] - -LABEL = "label" -IS_FINAL = "is_final" -IS_START = "is_start" - - -@pytest.fixture(scope="class", params=range(5)) -def graph(request) -> MultiDiGraph: - n_of_nodes = random.randint(20, 40) - return cfpq_data.graphs.generators.labeled_scale_free_graph( - n_of_nodes, labels=LABEL - ) - @pytest.fixture(scope="class", params=range(5)) def query(request) -> str: - return random.choice(QUERIES) + return random.choice(REGEXES) class TestReachability: def test(self, graph, query) -> None: - start_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - final_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) + start_nodes, final_nodes = generate_rnd_start_and_final(graph.copy()) fa = FiniteAutomaton( graph_to_nfa(deepcopy(graph), deepcopy(start_nodes), deepcopy(final_nodes)) ) constraint_fa = FiniteAutomaton(regex_to_dfa(query)) - reachable: dict = reachability_with_constraints( - deepcopy(fa), deepcopy(constraint_fa) + reachable = rpq_dict_to_set( + reachability_with_constraints(deepcopy(fa), deepcopy(constraint_fa)) ) - reachable = {k: v for k, v in reachable.items() if len(v) != 0} ends = paths_ends( deepcopy(graph), deepcopy(start_nodes), deepcopy(final_nodes), query ) - assert len(set(reachable.keys())) == len(set(map(lambda x: x[0], ends))) - - equivalency_flag = True - for start, final in ends: - if start in reachable.keys() and final in reachable[start]: - continue - else: - equivalency_flag = False - break - assert equivalency_flag + assert set(ends) == reachable diff --git a/tests/autotests/test_task6.py b/tests/autotests/test_task6.py index d3f0dbcf1..92b32efcd 100644 --- a/tests/autotests/test_task6.py +++ b/tests/autotests/test_task6.py @@ -2,14 +2,10 @@ # You MUST NOT touch anything here except ONE block below # You CAN modify this file IF AND ONLY IF you have found a bug and are willing to fix it # Otherwise, please report it -import itertools -import random -from copy import deepcopy -import cfpq_data as cd -import networkx as nx import pytest -from networkx import MultiDiGraph -from pyformlang import cfg +from grammars_constants import REGEXP_CFG, GRAMMARS +from rpq_template_test import rpq_cfpq_test, different_grammars_test +from fixtures import graph # Fix import statements in try block to run tests try: @@ -20,146 +16,15 @@ except ImportError: pytestmark = pytest.mark.skip("Task 6 is not ready to test!") -REGEXP_CFG: dict[str, list[cfg.CFG]] = { - "a": [cfg.CFG.from_text("S -> a"), cfg.CFG.from_text("S -> N B\nB -> $\nN -> a")], - "a*": [ - cfg.CFG.from_text("S -> $ | a S"), - cfg.CFG.from_text("S -> $ | S S | a"), - cfg.CFG.from_text("S -> S a S | $"), - ], - "a b c": [cfg.CFG.from_text("S -> a b c"), cfg.CFG.from_text("S -> a B\nB -> b c")], - "a*b*": [ - cfg.CFG.from_text("S -> S1 S2\nS2 -> $ | b S2\nS1 -> $ | a S1"), - cfg.CFG.from_text("S -> $ | S1 | a S\nS1 -> $ | b S1"), - ], - "(a b)*": [ - cfg.CFG.from_text("S -> $ | a b S"), - cfg.CFG.from_text("S -> $ | S S1\nS1 -> a b"), - ], - "a b*c*": [ - cfg.CFG.from_text("S -> S1 S2 S3\nS1 -> a\nS2 -> $ | S2 b\nS3 -> $ | c S3"), - cfg.CFG.from_text("S -> a S2 S3\nS2 -> S2 b | $\nS3 -> c | $ | S3 S3"), - ], - "(a|b|c|d|e)*": [ - cfg.CFG.from_text("S -> $ | S1 S\nS1 -> a | b | c | d | e"), - cfg.CFG.from_text("S -> $ | a | b | c | d | e | S S"), - cfg.CFG.from_text("S -> $ | a S | b S | c S | e S | d S"), - ], - "((a | b) * c)*(d | e)": [ - cfg.CFG.from_text( - "S -> S1 S2\nS1 -> S1 S1 | $ | S3 c\n S2 -> d | e\n S3 -> b S3 | $ | a S3" - ), - cfg.CFG.from_text("S -> S1 d | S1 e\nS1 -> S1 S3 c | $\nS3 -> b S3 | $ | a S3"), - ], -} - -GRAMMARS = [ - [ - cfg.CFG.from_text("S -> $ | a S b | S S"), - cfg.CFG.from_text("S -> $ | a S b S"), - cfg.CFG.from_text("S -> $ | S a S b"), - cfg.CFG.from_text("S -> $ | a S b | S S S"), - ], - [ - cfg.CFG.from_text("S -> $ | a S b | c S d | S S"), - cfg.CFG.from_text("S -> $ | a S b S | c S d S"), - cfg.CFG.from_text("S -> $ | S a S b | S c S d"), - cfg.CFG.from_text("S -> $ | a S b | c S d S | S S S"), - ], - [ - cfg.CFG.from_text("S -> $ | S1 S S2\nS1 -> a | c\n S2 -> b | d\n S -> S S"), - cfg.CFG.from_text("S -> $ | S1 S S2 S\n S1 -> a | c\nS2 -> b | d"), - cfg.CFG.from_text("S -> $ | S a S b | S a S d | S c S d | S c S b"), - cfg.CFG.from_text("S -> $ | S1 S S2 | S S S\nS1 -> a | c\nS2-> b | d"), - ], - [ - cfg.CFG.from_text("S -> S S | Se S1 Se\nSe -> $ | Se e\nS1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> S1 | S S | e\nS1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> S2 S | $\n S2 -> e | S1\n S1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> $ | S1 S | e S\n S1 -> $ | a S1 b"), - ], - [ - cfg.CFG.from_text("S -> a S | $"), - cfg.CFG.from_text("S -> S1 | a\nS1 -> a S1 | $"), - ], -] - -LABELS = ["a", "b", "c", "d", "e", "f", "g", "h"] - -LABEL = "label" -IS_FINAL = "is_final" -IS_START = "is_start" - - -@pytest.fixture(scope="function", params=range(5)) -def graph(request) -> MultiDiGraph: - n_of_nodes = random.randint(20, 40) - return cd.graphs.labeled_scale_free_graph(n_of_nodes, labels=LABELS) - class TestReachability: - @pytest.mark.parametrize( - "regex_str, cfg_list", REGEXP_CFG.items(), ids=lambda regexp_cfgs: regexp_cfgs - ) - def test_rpq_cfpq(self, graph, regex_str, cfg_list) -> None: - start_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - final_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - - for node, data in graph.nodes(data=True): - if node in start_nodes: - data[IS_START] = True - if node in final_nodes: - data[IS_FINAL] = True - - for cf_gram in cfg_list: - cfpq: set[tuple[int, int]] = cfpq_with_hellings( - cf_gram, deepcopy(graph), start_nodes, final_nodes - ) - rpq: dict[int, set[int]] = reachability_with_constraints( - FiniteAutomaton(graph_to_nfa(graph, start_nodes, final_nodes)), - FiniteAutomaton(regex_to_dfa(regex_str)), - ) - rpq_set = set() - for node_from, nodes_to in rpq.items(): - for node_to in nodes_to: - rpq_set.add((node_from, node_to)) - assert cfpq == rpq_set - - @pytest.mark.parametrize("eq_grammars", GRAMMARS, ids=lambda grammars: grammars) - def test_different_grammars(self, graph, eq_grammars): - start_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - final_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - - for node, data in graph.nodes(data=True): - if node in start_nodes: - data[IS_START] = True - if node in final_nodes: - data[IS_FINAL] = True + @pytest.mark.parametrize("regex_str, cfg_list", REGEXP_CFG) + def test_rpq_cfpq_hellings(self, graph, regex_str, cfg_list): + rpq_cfpq_test(graph, regex_str, cfg_list, cfpq_with_hellings) - eq_cfpqs = [ - cfpq_with_hellings( - deepcopy(cf_gram), deepcopy(graph), start_nodes, final_nodes - ) - for cf_gram in eq_grammars - ] - for a, b in itertools.combinations(eq_cfpqs, 2): - assert a == b + @pytest.mark.parametrize("eq_grammars", GRAMMARS) + def test_different_grammars_hellings(self, graph, eq_grammars): + different_grammars_test(graph, eq_grammars, cfpq_with_hellings) def test_cfg_to_weak_normal_form_exists(): diff --git a/tests/autotests/test_task7.py b/tests/autotests/test_task7.py index 85adf709d..78ab7db7e 100644 --- a/tests/autotests/test_task7.py +++ b/tests/autotests/test_task7.py @@ -2,14 +2,12 @@ # You MUST NOT touch anything here except ONE block below # You CAN modify this file IF AND ONLY IF you have found a bug and are willing to fix it # Otherwise, please report it -import itertools -import random from copy import deepcopy -import cfpq_data as cd -import networkx as nx import pytest -from networkx import MultiDiGraph -from pyformlang import cfg +from grammars_constants import REGEXP_CFG, GRAMMARS, GRAMMARS_DIFFERENT +from helper import generate_rnd_start_and_final +from rpq_template_test import rpq_cfpq_test, different_grammars_test +from fixtures import graph # Fix import statements in try block to run tests try: @@ -21,145 +19,17 @@ except ImportError: pytestmark = pytest.mark.skip("Task 7 is not ready to test!") -REGEXP_CFG: dict[str, list[cfg.CFG]] = { - "a": [cfg.CFG.from_text("S -> a"), cfg.CFG.from_text("S -> N B\nB -> $\nN -> a")], - "a*": [ - cfg.CFG.from_text("S -> $ | a S"), - cfg.CFG.from_text("S -> $ | S S | a"), - cfg.CFG.from_text("S -> S a S | $"), - ], - "a b c": [cfg.CFG.from_text("S -> a b c"), cfg.CFG.from_text("S -> a B\nB -> b c")], - "a*b*": [ - cfg.CFG.from_text("S -> S1 S2\nS2 -> $ | b S2\nS1 -> $ | a S1"), - cfg.CFG.from_text("S -> $ | S1 | a S\nS1 -> $ | b S1"), - ], - "(a b)*": [ - cfg.CFG.from_text("S -> $ | a b S"), - cfg.CFG.from_text("S -> $ | S S1\nS1 -> a b"), - ], - "a b*c*": [ - cfg.CFG.from_text("S -> S1 S2 S3\nS1 -> a\nS2 -> $ | S2 b\nS3 -> $ | c S3"), - cfg.CFG.from_text("S -> a S2 S3\nS2 -> S2 b | $\nS3 -> c | $ | S3 S3"), - ], - "(a|b|c|d|e)*": [ - cfg.CFG.from_text("S -> $ | S1 S\nS1 -> a | b | c | d | e"), - cfg.CFG.from_text("S -> $ | a | b | c | d | e | S S"), - cfg.CFG.from_text("S -> $ | a S | b S | c S | e S | d S"), - ], - "((a | b) * c)*(d | e)": [ - cfg.CFG.from_text( - "S -> S1 S2\nS1 -> S1 S1 | $ | S3 c\n S2 -> d | e\n S3 -> b S3 | $ | a S3" - ), - cfg.CFG.from_text("S -> S1 d | S1 e\nS1 -> S1 S3 c | $\nS3 -> b S3 | $ | a S3"), - ], -} - -GRAMMARS = [ - [ - cfg.CFG.from_text("S -> $ | a S b | S S"), - cfg.CFG.from_text("S -> $ | a S b S"), - cfg.CFG.from_text("S -> $ | S a S b"), - cfg.CFG.from_text("S -> $ | a S b | S S S"), - ], - [ - cfg.CFG.from_text("S -> $ | a S b | c S d | S S"), - cfg.CFG.from_text("S -> $ | a S b S | c S d S"), - cfg.CFG.from_text("S -> $ | S a S b | S c S d"), - cfg.CFG.from_text("S -> $ | a S b | c S d S | S S S"), - ], - [ - cfg.CFG.from_text("S -> $ | S1 S S2\nS1 -> a | c\n S2 -> b | d\n S -> S S"), - cfg.CFG.from_text("S -> $ | S1 S S2 S\n S1 -> a | c\nS2 -> b | d"), - cfg.CFG.from_text("S -> $ | S a S b | S a S d | S c S d | S c S b"), - cfg.CFG.from_text("S -> $ | S1 S S2 | S S S\nS1 -> a | c\nS2-> b | d"), - ], - [ - cfg.CFG.from_text("S -> S S | Se S1 Se\nSe -> $ | Se e\nS1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> S1 | S S | e\nS1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> S2 S | $\n S2 -> e | S1\n S1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> $ | S1 S | e S\n S1 -> $ | a S1 b"), - ], - [ - cfg.CFG.from_text("S -> a S | $"), - cfg.CFG.from_text("S -> S1 | a\nS1 -> a S1 | $"), - ], -] - -GRAMMARS_DIFFERENT = [ - cfg.CFG.from_text( - "S -> S1 | S2\nS1 -> Sab | S1 c\nSab -> $ | a Sab b\nS2 -> Sbc | a S2\nSbc -> b Sbc c" - ), - cfg.CFG.from_text("S -> a | b | S c S | S d S | e S f | g S"), - cfg.CFG.from_text("S -> $ | a S b | b S a | e S f | S S | c S d | f S c | f S e"), -] - -LABELS = ["a", "b", "c", "d", "e", "f", "g", "h"] - -LABEL = "label" -IS_FINAL = "is_final" -IS_START = "is_start" - - -def generate_rnd_start_and_final(graph): - start_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - final_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - - for node, data in graph.nodes(data=True): - if node in start_nodes: - data[IS_START] = True - if node in final_nodes: - data[IS_FINAL] = True - return start_nodes, final_nodes - - -@pytest.fixture(scope="function", params=range(5)) -def graph(request) -> MultiDiGraph: - n_of_nodes = random.randint(20, 40) - return cd.graphs.labeled_scale_free_graph(n_of_nodes, labels=LABELS) - class TestReachabilityMatrixAlgorithm: - @pytest.mark.parametrize( - "regex_str, cfg_list", REGEXP_CFG.items(), ids=lambda regexp_cfgs: regexp_cfgs - ) + @pytest.mark.parametrize("regex_str, cfg_list", REGEXP_CFG) def test_rpq_cfpq_matrix(self, graph, regex_str, cfg_list) -> None: - start_nodes, final_nodes = generate_rnd_start_and_final(graph) - - for cf_gram in cfg_list: - cfpq: set[tuple[int, int]] = cfpq_with_matrix( - cf_gram, deepcopy(graph), start_nodes, final_nodes - ) - rpq: dict[int, set[int]] = reachability_with_constraints( - FiniteAutomaton(graph_to_nfa(graph, start_nodes, final_nodes)), - FiniteAutomaton(regex_to_dfa(regex_str)), - ) - rpq_set = set() - for node_from, nodes_to in rpq.items(): - for node_to in nodes_to: - rpq_set.add((node_from, node_to)) - assert cfpq == rpq_set + rpq_cfpq_test(graph, regex_str, cfg_list, cfpq_with_matrix) - @pytest.mark.parametrize("eq_grammars", GRAMMARS, ids=lambda grammars: grammars) - def test_different_grammars(self, graph, eq_grammars): - start_nodes, final_nodes = generate_rnd_start_and_final(graph) - eq_cfpqs = [ - cfpq_with_matrix( - deepcopy(cf_gram), deepcopy(graph), start_nodes, final_nodes - ) - for cf_gram in eq_grammars - ] - for a, b in itertools.combinations(eq_cfpqs, 2): - assert a == b + @pytest.mark.parametrize("eq_grammars", GRAMMARS) + def test_different_grammars_matrix(self, graph, eq_grammars): + different_grammars_test(graph, eq_grammars, cfpq_with_matrix) - @pytest.mark.parametrize("grammar", GRAMMARS_DIFFERENT, ids=lambda g: g) + @pytest.mark.parametrize("grammar", GRAMMARS_DIFFERENT) def test_hellings_matrix(self, graph, grammar): start_nodes, final_nodes = generate_rnd_start_and_final(graph) hellings = cfpq_with_hellings( diff --git a/tests/autotests/test_task8.py b/tests/autotests/test_task8.py index aebf0eab4..9cd44710c 100644 --- a/tests/autotests/test_task8.py +++ b/tests/autotests/test_task8.py @@ -2,14 +2,16 @@ # You MUST NOT touch anything here except ONE block below # You CAN modify this file IF AND ONLY IF you have found a bug and are willing to fix it # Otherwise, please report it -import itertools -import random from copy import deepcopy -import cfpq_data as cd -import networkx as nx import pytest -from networkx import MultiDiGraph -from pyformlang import cfg, rsa +from grammars_constants import REGEXP_CFG, GRAMMARS, GRAMMARS_DIFFERENT, CFG_EBNF +from helper import generate_rnd_start_and_final +from rpq_template_test import ( + rpq_cfpq_test, + different_grammars_test, + cfpq_algorithm_test, +) +from fixtures import graph # Fix import statements in try block to run tests try: @@ -22,153 +24,23 @@ except ImportError: pytestmark = pytest.mark.skip("Task 8 is not ready to test!") -REGEXP_CFG: dict[str, list[cfg.CFG]] = { - "a": [cfg.CFG.from_text("S -> a"), cfg.CFG.from_text("S -> N B\nB -> $\nN -> a")], - "a*": [ - cfg.CFG.from_text("S -> $ | a S"), - cfg.CFG.from_text("S -> $ | S S | a"), - cfg.CFG.from_text("S -> S a S | $"), - ], - "a b c": [cfg.CFG.from_text("S -> a b c"), cfg.CFG.from_text("S -> a B\nB -> b c")], - "a*b*": [ - cfg.CFG.from_text("S -> S1 S2\nS2 -> $ | b S2\nS1 -> $ | a S1"), - cfg.CFG.from_text("S -> $ | S1 | a S\nS1 -> $ | b S1"), - ], - "(a b)*": [ - cfg.CFG.from_text("S -> $ | a b S"), - cfg.CFG.from_text("S -> $ | S S1\nS1 -> a b"), - ], - "a b*c*": [ - cfg.CFG.from_text("S -> S1 S2 S3\nS1 -> a\nS2 -> $ | S2 b\nS3 -> $ | c S3"), - cfg.CFG.from_text("S -> a S2 S3\nS2 -> S2 b | $\nS3 -> c | $ | S3 S3"), - ], - "(a|b|c|d|e)*": [ - cfg.CFG.from_text("S -> $ | S1 S\nS1 -> a | b | c | d | e"), - cfg.CFG.from_text("S -> $ | a | b | c | d | e | S S"), - cfg.CFG.from_text("S -> $ | a S | b S | c S | e S | d S"), - ], - "((a | b) * c)*(d | e)": [ - cfg.CFG.from_text( - "S -> S1 S2\nS1 -> S1 S1 | $ | S3 c\n S2 -> d | e\n S3 -> b S3 | $ | a S3" - ), - cfg.CFG.from_text("S -> S1 d | S1 e\nS1 -> S1 S3 c | $\nS3 -> b S3 | $ | a S3"), - ], -} - -GRAMMARS = [ - [ - cfg.CFG.from_text("S -> $ | a S b | S S"), - cfg.CFG.from_text("S -> $ | a S b S"), - cfg.CFG.from_text("S -> $ | S a S b"), - cfg.CFG.from_text("S -> $ | a S b | S S S"), - ], - [ - cfg.CFG.from_text("S -> $ | a S b | c S d | S S"), - cfg.CFG.from_text("S -> $ | a S b S | c S d S"), - cfg.CFG.from_text("S -> $ | S a S b | S c S d"), - cfg.CFG.from_text("S -> $ | a S b | c S d S | S S S"), - ], - [ - cfg.CFG.from_text("S -> $ | S1 S S2\nS1 -> a | c\n S2 -> b | d\n S -> S S"), - cfg.CFG.from_text("S -> $ | S1 S S2 S\n S1 -> a | c\nS2 -> b | d"), - cfg.CFG.from_text("S -> $ | S a S b | S a S d | S c S d | S c S b"), - cfg.CFG.from_text("S -> $ | S1 S S2 | S S S\nS1 -> a | c\nS2-> b | d"), - ], - [ - cfg.CFG.from_text("S -> S S | Se S1 Se\nSe -> $ | Se e\nS1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> S1 | S S | e\nS1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> S2 S | $\n S2 -> e | S1\n S1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> $ | S1 S | e S\n S1 -> $ | a S1 b"), - ], - [ - cfg.CFG.from_text("S -> a S | $"), - cfg.CFG.from_text("S -> S1 | a\nS1 -> a S1 | $"), - ], -] - -GRAMMARS_DIFFERENT = [ - cfg.CFG.from_text( - "S -> S1 | S2\nS1 -> Sab | S1 c\nSab -> $ | a Sab b\nS2 -> Sbc | a S2\nSbc -> b Sbc c" - ), - cfg.CFG.from_text("S -> a | b | S c S | S d S | e S f | g S"), - cfg.CFG.from_text("S -> $ | a S b | b S a | e S f | S S | c S d | f S c | f S e"), -] - -EBNF_GRAMMARS = [ - """S -> ( Sab c* ) | ( a* Sbc ) - Sab -> a ( Sab | $ ) b - Sbc -> b ( Sbc | $ ) c""", - "S -> a | b | (S ( c | d ) S ) | ( e S f ) | ( g S )", - "S -> ( ( a S b ) | ( b S a ) | ( c S d ) | ( d S c ) | ( e S f ) | (f S e) )*", -] - -LABELS = ["a", "b", "c", "d", "e", "f", "g", "h"] - -LABEL = "label" -IS_FINAL = "is_final" -IS_START = "is_start" - - -def generate_rnd_start_and_final(graph): - start_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - final_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - - for node, data in graph.nodes(data=True): - if node in start_nodes: - data[IS_START] = True - if node in final_nodes: - data[IS_FINAL] = True - return start_nodes, final_nodes - - -@pytest.fixture(scope="function", params=range(5)) -def graph(request) -> MultiDiGraph: - n_of_nodes = random.randint(20, 40) - return cd.graphs.labeled_scale_free_graph(n_of_nodes, labels=LABELS) - class TestReachabilityTensorAlgorithm: - @pytest.mark.parametrize( - "regex_str, cfg_list", REGEXP_CFG.items(), ids=lambda regexp_cfgs: regexp_cfgs - ) + @pytest.mark.parametrize("regex_str, cfg_list", REGEXP_CFG) def test_rpq_cfpq_tensor(self, graph, regex_str, cfg_list) -> None: - start_nodes, final_nodes = generate_rnd_start_and_final(graph) - - for cf_gram in cfg_list: - cfpq: set[tuple[int, int]] = cfpq_with_tensor( - cfg_to_rsm(cf_gram), deepcopy(graph), start_nodes, final_nodes - ) - rpq: dict[int, set[int]] = reachability_with_constraints( - FiniteAutomaton(graph_to_nfa(graph, start_nodes, final_nodes)), - FiniteAutomaton(regex_to_dfa(regex_str)), - ) - rpq_set = set() - for node_from, nodes_to in rpq.items(): - for node_to in nodes_to: - rpq_set.add((node_from, node_to)) - assert cfpq == rpq_set + rpq_cfpq_test(graph, regex_str, cfg_list, cfpq_with_tensor) - @pytest.mark.parametrize("eq_grammars", GRAMMARS, ids=lambda grammars: grammars) + @pytest.mark.parametrize("eq_grammars", GRAMMARS) def test_different_grammars(self, graph, eq_grammars): - start_nodes, final_nodes = generate_rnd_start_and_final(graph) - eq_cfpqs = [ - cfpq_with_tensor( - cfg_to_rsm(deepcopy(cf_gram)), deepcopy(graph), start_nodes, final_nodes - ) - for cf_gram in eq_grammars - ] - for a, b in itertools.combinations(eq_cfpqs, 2): - assert a == b + different_grammars_test(graph, eq_grammars, cfpq_with_tensor) - @pytest.mark.parametrize("grammar", GRAMMARS_DIFFERENT, ids=lambda g: g) + @pytest.mark.parametrize("cfg_list, ebnf_list", CFG_EBNF) + def test_cfpq_tensor(self, graph, cfg_list, ebnf_list): + cfpq_algorithm_test( + graph, ebnf_list, cfg_list, ebnf_to_rsm, cfg_to_rsm, cfpq_with_tensor + ) + + @pytest.mark.parametrize("grammar", GRAMMARS_DIFFERENT) def test_hellings_matrix_tensor(self, graph, grammar): start_nodes, final_nodes = generate_rnd_start_and_final(graph) hellings = cfpq_with_hellings( @@ -181,40 +53,3 @@ def test_hellings_matrix_tensor(self, graph, grammar): cfg_to_rsm(deepcopy(grammar)), deepcopy(graph), start_nodes, final_nodes ) assert (hellings == matrix) and (matrix == tensor) - - @pytest.mark.parametrize( - "cfg_grammar, ebnf_grammar", - (zip(GRAMMARS_DIFFERENT, EBNF_GRAMMARS)), - ids=lambda t: t, - ) - def test_ebnf_cfg(self, graph, cfg_grammar, ebnf_grammar): - start_nodes, final_nodes = generate_rnd_start_and_final(graph) - cfg_cfpq = cfpq_with_tensor( - cfg_to_rsm(cfg_grammar), deepcopy(graph), start_nodes, final_nodes - ) - ebnf_cfpq = cfpq_with_tensor( - ebnf_to_rsm(ebnf_grammar), deepcopy(graph), start_nodes, final_nodes - ) - assert ebnf_cfpq == cfg_cfpq - - @pytest.mark.parametrize( - "regex_str, cfg_list", REGEXP_CFG.items(), ids=lambda regexp_cfgs: regexp_cfgs - ) - def test_cfpq_tensor(self, graph, regex_str, cfg_list): - start_nodes, final_nodes = generate_rnd_start_and_final(graph) - eq_cfpqs = [ - cfpq_with_tensor( - cfg_to_rsm(deepcopy(cf_gram)), deepcopy(graph), start_nodes, final_nodes - ) - for cf_gram in cfg_list - ] - eq_cfpqs.append( - cfpq_with_tensor( - ebnf_to_rsm(f"S -> {regex_str}"), - deepcopy(graph), - start_nodes, - final_nodes, - ) - ) - for a, b in itertools.combinations(eq_cfpqs, 2): - assert a == b diff --git a/tests/autotests/test_task9.py b/tests/autotests/test_task9.py index e9411a2e9..3bdfac5c3 100644 --- a/tests/autotests/test_task9.py +++ b/tests/autotests/test_task9.py @@ -2,14 +2,16 @@ # You MUST NOT touch anything here except ONE block below # You CAN modify this file IF AND ONLY IF you have found a bug and are willing to fix it # Otherwise, please report it -import itertools -import random from copy import deepcopy -import cfpq_data as cd -import networkx as nx import pytest -from networkx import MultiDiGraph -from pyformlang import cfg, rsa +from grammars_constants import REGEXP_CFG, GRAMMARS, GRAMMARS_DIFFERENT, CFG_EBNF +from helper import generate_rnd_start_and_final +from rpq_template_test import ( + rpq_cfpq_test, + different_grammars_test, + cfpq_algorithm_test, +) +from fixtures import graph # Fix import statements in try block to run tests try: @@ -23,153 +25,23 @@ except ImportError: pytestmark = pytest.mark.skip("Task 9 is not ready to test!") -REGEXP_CFG: dict[str, list[cfg.CFG]] = { - "a": [cfg.CFG.from_text("S -> a"), cfg.CFG.from_text("S -> N B\nB -> $\nN -> a")], - "a*": [ - cfg.CFG.from_text("S -> $ | a S"), - cfg.CFG.from_text("S -> $ | S S | a"), - cfg.CFG.from_text("S -> S a S | $"), - ], - "a b c": [cfg.CFG.from_text("S -> a b c"), cfg.CFG.from_text("S -> a B\nB -> b c")], - "a*b*": [ - cfg.CFG.from_text("S -> S1 S2\nS2 -> $ | b S2\nS1 -> $ | a S1"), - cfg.CFG.from_text("S -> $ | S1 | a S\nS1 -> $ | b S1"), - ], - "(a b)*": [ - cfg.CFG.from_text("S -> $ | a b S"), - cfg.CFG.from_text("S -> $ | S S1\nS1 -> a b"), - ], - "a b*c*": [ - cfg.CFG.from_text("S -> S1 S2 S3\nS1 -> a\nS2 -> $ | S2 b\nS3 -> $ | c S3"), - cfg.CFG.from_text("S -> a S2 S3\nS2 -> S2 b | $\nS3 -> c | $ | S3 S3"), - ], - "(a|b|c|d|e)*": [ - cfg.CFG.from_text("S -> $ | S1 S\nS1 -> a | b | c | d | e"), - cfg.CFG.from_text("S -> $ | a | b | c | d | e | S S"), - cfg.CFG.from_text("S -> $ | a S | b S | c S | e S | d S"), - ], - "((a | b) * c)*(d | e)": [ - cfg.CFG.from_text( - "S -> S1 S2\nS1 -> S1 S1 | $ | S3 c\n S2 -> d | e\n S3 -> b S3 | $ | a S3" - ), - cfg.CFG.from_text("S -> S1 d | S1 e\nS1 -> S1 S3 c | $\nS3 -> b S3 | $ | a S3"), - ], -} - -GRAMMARS = [ - [ - cfg.CFG.from_text("S -> $ | a S b | S S"), - cfg.CFG.from_text("S -> $ | a S b S"), - cfg.CFG.from_text("S -> $ | S a S b"), - cfg.CFG.from_text("S -> $ | a S b | S S S"), - ], - [ - cfg.CFG.from_text("S -> $ | a S b | c S d | S S"), - cfg.CFG.from_text("S -> $ | a S b S | c S d S"), - cfg.CFG.from_text("S -> $ | S a S b | S c S d"), - cfg.CFG.from_text("S -> $ | a S b | c S d S | S S S"), - ], - [ - cfg.CFG.from_text("S -> $ | S1 S S2\nS1 -> a | c\n S2 -> b | d\n S -> S S"), - cfg.CFG.from_text("S -> $ | S1 S S2 S\n S1 -> a | c\nS2 -> b | d"), - cfg.CFG.from_text("S -> $ | S a S b | S a S d | S c S d | S c S b"), - cfg.CFG.from_text("S -> $ | S1 S S2 | S S S\nS1 -> a | c\nS2-> b | d"), - ], - [ - cfg.CFG.from_text("S -> S S | Se S1 Se\nSe -> $ | Se e\nS1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> S1 | S S | e\nS1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> S2 S | $\n S2 -> e | S1\n S1 -> $ | a S1 b"), - cfg.CFG.from_text("S -> $ | S1 S | e S\n S1 -> $ | a S1 b"), - ], - [ - cfg.CFG.from_text("S -> a S | $"), - cfg.CFG.from_text("S -> S1 | a\nS1 -> a S1 | $"), - ], -] - -GRAMMARS_DIFFERENT = [ - cfg.CFG.from_text( - "S -> S1 | S2\nS1 -> Sab | S1 c\nSab -> $ | a Sab b\nS2 -> Sbc | a S2\nSbc -> b Sbc c" - ), - cfg.CFG.from_text("S -> a | b | S c S | S d S | e S f | g S"), - cfg.CFG.from_text("S -> $ | a S b | b S a | e S f | S S | c S d | f S c | f S e"), -] - -EBNF_GRAMMARS = [ - """S -> ( Sab c* ) | ( a* Sbc ) - Sab -> a ( Sab | $ ) b - Sbc -> b ( Sbc | $ ) c""", - "S -> a | b | (S ( c | d ) S ) | ( e S f ) | ( g S )", - "S -> ( ( a S b ) | ( b S a ) | ( c S d ) | ( d S c ) | ( e S f ) | (f S e) )*", -] - -LABELS = ["a", "b", "c", "d", "e", "f", "g", "h"] - -LABEL = "label" -IS_FINAL = "is_final" -IS_START = "is_start" - - -def generate_rnd_start_and_final(graph): - start_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - final_nodes = set( - random.choices( - list(graph.nodes().keys()), k=random.randint(1, len(graph.nodes)) - ) - ) - - for node, data in graph.nodes(data=True): - if node in start_nodes: - data[IS_START] = True - if node in final_nodes: - data[IS_FINAL] = True - return start_nodes, final_nodes - - -@pytest.fixture(scope="function", params=range(5)) -def graph(request) -> MultiDiGraph: - n_of_nodes = random.randint(20, 40) - return cd.graphs.labeled_scale_free_graph(n_of_nodes, labels=LABELS) - class TestReachabilityGllAlgorithm: - @pytest.mark.parametrize( - "regex_str, cfg_list", REGEXP_CFG.items(), ids=lambda regexp_cfgs: regexp_cfgs - ) + @pytest.mark.parametrize("regex_str, cfg_list", REGEXP_CFG) def test_rpq_cfpq_gll(self, graph, regex_str, cfg_list) -> None: - start_nodes, final_nodes = generate_rnd_start_and_final(graph) - - for cf_gram in cfg_list: - cfpq: set[tuple[int, int]] = cfpq_with_gll( - cfg_to_rsm(cf_gram), deepcopy(graph), start_nodes, final_nodes - ) - rpq: dict[int, set[int]] = reachability_with_constraints( - FiniteAutomaton(graph_to_nfa(graph, start_nodes, final_nodes)), - FiniteAutomaton(regex_to_dfa(regex_str)), - ) - rpq_set = set() - for node_from, nodes_to in rpq.items(): - for node_to in nodes_to: - rpq_set.add((node_from, node_to)) - assert cfpq == rpq_set + rpq_cfpq_test(graph, regex_str, cfg_list, cfpq_with_gll) - @pytest.mark.parametrize("eq_grammars", GRAMMARS, ids=lambda grammars: grammars) + @pytest.mark.parametrize("eq_grammars", GRAMMARS) def test_different_grammars(self, graph, eq_grammars): - start_nodes, final_nodes = generate_rnd_start_and_final(graph) - eq_cfpqs = [ - cfpq_with_gll( - cfg_to_rsm(deepcopy(cf_gram)), deepcopy(graph), start_nodes, final_nodes - ) - for cf_gram in eq_grammars - ] - for a, b in itertools.combinations(eq_cfpqs, 2): - assert a == b + different_grammars_test(graph, eq_grammars, cfpq_with_gll) - @pytest.mark.parametrize("grammar", GRAMMARS_DIFFERENT, ids=lambda g: g) + @pytest.mark.parametrize("cfg_list, ebnf_list", CFG_EBNF) + def test_cfpq_gll(self, graph, cfg_list, ebnf_list): + cfpq_algorithm_test( + graph, ebnf_list, cfg_list, ebnf_to_rsm, cfg_to_rsm, cfpq_with_gll + ) + + @pytest.mark.parametrize("grammar", GRAMMARS_DIFFERENT) def test_hellings_matrix_tensor(self, graph, grammar): start_nodes, final_nodes = generate_rnd_start_and_final(graph) hellings = cfpq_with_hellings( @@ -185,40 +57,3 @@ def test_hellings_matrix_tensor(self, graph, grammar): cfg_to_rsm(deepcopy(grammar)), deepcopy(graph), start_nodes, final_nodes ) assert (hellings == matrix) and (matrix == tensor) and (tensor == gll) - - @pytest.mark.parametrize( - "cfg_grammar, ebnf_grammar", - (zip(GRAMMARS_DIFFERENT, EBNF_GRAMMARS)), - ids=lambda t: t, - ) - def test_ebnf_cfg(self, graph, cfg_grammar, ebnf_grammar): - start_nodes, final_nodes = generate_rnd_start_and_final(graph) - cfg_cfpq = cfpq_with_gll( - cfg_to_rsm(cfg_grammar), deepcopy(graph), start_nodes, final_nodes - ) - ebnf_cfpq = cfpq_with_gll( - ebnf_to_rsm(ebnf_grammar), deepcopy(graph), start_nodes, final_nodes - ) - assert ebnf_cfpq == cfg_cfpq - - @pytest.mark.parametrize( - "regex_str, cfg_list", REGEXP_CFG.items(), ids=lambda regexp_cfgs: regexp_cfgs - ) - def test_cfpq_gll(self, graph, regex_str, cfg_list): - start_nodes, final_nodes = generate_rnd_start_and_final(graph) - eq_cfpqs = [ - cfpq_with_gll( - cfg_to_rsm(deepcopy(cf_gram)), deepcopy(graph), start_nodes, final_nodes - ) - for cf_gram in cfg_list - ] - eq_cfpqs.append( - cfpq_with_gll( - ebnf_to_rsm(f"S -> {regex_str}"), - deepcopy(graph), - start_nodes, - final_nodes, - ) - ) - for a, b in itertools.combinations(eq_cfpqs, 2): - assert a == b