From 52ad67755838aeeae3a7f9a90bac2d17fc46cffd Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 14 Feb 2024 20:47:42 +0300 Subject: [PATCH 1/4] [HW-#1.0] Add initial solution for 1st home task --- README.md | 16 ++++++++-------- project/hw1/task.py | 16 ++++++++++++++++ tests/test_hw1_task.py | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 project/hw1/task.py create mode 100644 tests/test_hw1_task.py diff --git a/README.md b/README.md index b18fdf77f..ffc866079 100644 --- a/README.md +++ b/README.md @@ -168,28 +168,28 @@ - Синтаксический анализ языков программирования: в компиляторах, интерпертаторах, средах разработки, других инстументах. - Анализ естественных языков. Активность в этой области несколько спала, так как на передний план сейчас вышли различные методы машинного обучения. - Однако и в этой области ведуться работы.Например, [International Conference on Parsing Technologies](http://www.wikicfp.com/cfp/program?id=1853). + Однако и в этой области ведуться работы.Например, [International Conference on Parsing Technologies](http://www.wikicfp.com/cfp/program?id=1853). - Статический анализ кода. - Различные задачи межпроцедурного анализа. Основной подход --- language reachability. Основоположник --- Томас Репс. Примеры работ. - Thomas Reps. 1997. Program analysis via graph reachability. In Proceedings of the 1997 international symposium on Logic programming (ILPS ’97). MIT Press, Cambridge, MA, USA, 5–19. - Qirun Zhang and Zhendong Su. 2017. Context-sensitive data-dependence analysis via linear conjunctive language reachability. In Proceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming Languages (POPL 2017). Association for Computing Machinery, New York, NY, USA, 344–358. DOI:https://doi.org/10.1145/3009837.3009848 - Kai Wang, Aftab Hussain, Zhiqiang Zuo, Guoqing Xu, and Ardalan Amiri Sani. 2017. Graspan: A Single-machine Disk-based Graph System for Interprocedural Static Analyses of Large-scale Systems Code. In Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS ’17). Association for Computing Machinery, New York, NY, USA, 389–404. DOI:https://doi.org/10.1145/3037697.3037744 - - Lu Y., Shang L., Xie X., Xue J. (2013) An Incremental Points-to Analysis with CFL-Reachability. In: Jhala R., De Bosschere K. (eds) Compiler Construction. CC 2013. Lecture Notes in Computer Science, vol 7791. Springer, Berlin, Heidelberg + - Lu Y., Shang L., Xie X., Xue J. (2013) An Incremental Points-to Analysis with CFL-Reachability. In: Jhala R., De Bosschere K. (eds) Compiler Construction. CC 2013. Lecture Notes in Computer Science, vol 7791. Springer, Berlin, Heidelberg - Интерливинг (или шафл) языков для верификаци многопоточных программ. - [Approximating the Shuffle of Context-free Languages to Find Bugs in Concurrent Recursive Programs](http://uu.diva-portal.org/smash/get/diva2:442518/FULLTEXT01.pdf) - Flick N.E. (2015) Quotients of Unbounded Parallelism. In: Leucker M., Rueda C., Valencia F. (eds) Theoretical Aspects of Computing - ICTAC 2015. ICTAC 2015. Lecture Notes in Computer Science, vol 9399. Springer, Cham - + - Система типов Java: [Radu Grigore, Java Generics are Turing Complete](https://arxiv.org/abs/1605.05274). - Графовые базы данных. Поиск путей с ограничениями. - Maurizio Nolé and Carlo Sartiani. 2016. Regular Path Queries on Massive Graphs. In Proceedings of the 28th International Conference on Scientific and Statistical Database Management (SSDBM ’16). Association for Computing Machinery, New York, NY, USA, Article 13, 1–12. DOI:https://doi.org/10.1145/2949689.2949711 - Jochem Kuijpers, George Fletcher, Nikolay Yakovets, and Tobias Lindaaker. 2019. An Experimental Study of Context-Free Path Query Evaluation Methods. In Proceedings of the 31st International Conference on Scientific and Statistical Database Management (SSDBM ’19). Association for Computing Machinery, New York, NY, USA, 121–132. DOI:https://doi.org/10.1145/3335783.3335791 - [Jelle Hellings. Querying for Paths in Graphs using Context-Free Path Queries.](https://arxiv.org/abs/1502.02242) - + - Биоинформатика. В основном это анализ геномных и белковых последовательностей. - [Witold Dyrka, Mateusz Pyzik, Francois Coste, and Hugo Talibart. Estimating probabilistic context-free grammars for proteins using contact map constraints.](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6428041/) - [James WJ Anderson, Paula Tataru, Joe Staines, Jotun Hein, and Rune Lyngso. Evolving stochastic context-free grammars for RNA secondary structure prediction.](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3464655/) - - [Ryan Zier-Vogel. Predicting RNA secondary structure using a stochastic conjunctive grammar.](https://www.semanticscholar.org/paper/Predicting-RNA-secondary-structure-using-a-grammar-Zier-Vogel/90bb312cb1a0f61eddb7a8b5b782bb40630894dd). + - [Ryan Zier-Vogel. Predicting RNA secondary structure using a stochastic conjunctive grammar.](https://www.semanticscholar.org/paper/Predicting-RNA-secondary-structure-using-a-grammar-Zier-Vogel/90bb312cb1a0f61eddb7a8b5b782bb40630894dd). - Машинное обучение. - [Matt J. Kusner, Brooks Paige, José Miguel Hernández-Lobato. Grammar Variational Autoencoder](https://arxiv.org/abs/1703.01925). Опубликована в 2017 году и уже [больше 950 цитирований.](https://scholar.google.com/scholar?cites=4080460899049502885&as_sdt=2005&sciodt=0,5&hl=ru) @@ -203,17 +203,17 @@ - [HYPEREDGE REPLACEMENT GRAPH GRAMMARS](https://people.cs.umu.se/drewes/biblio/ps-files/hrg.pdf) - [(Re)introducing Regular Graph Languages](https://www.aclweb.org/anthology/W17-3410.pdf) - [Hyperedge Replacement: Grammars and Languages](https://www.springer.com/gp/book/9783540560050) - - $\ldots$ + - $\ldots$ - Теория групп. Как правило, это проблема слов группы или дополнение к ней. - Anisimov, A.V. Group languages. Cybern Syst Anal (1971) 7: 594. - David E. Muller, Paul E. Schupp, Groups, the Theory of ends, and context-free languages, Journal of Computer and System Sciences, Volume 26, Issue 3, 1983, Pages 295-310, ISSN 0022-0000 - HOLT, D., REES, S., ROVER, C., \& THOMAS, R. (2005). GROUPS WITH CONTEXT-FREE CO-WORD PROBLEM. Journal of the London Mathematical Society, 71(3), 643-657. doi:10.1112/S002461070500654X - [Groups with Context-Free Co-Word Problem and Embeddings into Thompson's Group V](https://arxiv.org/abs/1407.7745) - [Kropholler, R. \& Spriano, D. (2019). Closure properties in the class of multiple context-free groups. Groups Complexity Cryptology, 11(1), pp. 1-15. Retrieved 13 Feb. 2020, from doi:10.1515/gcc-2019-2004](https://www.degruyter.com/view/j/gcc.2019.11.issue-1/gcc-2019-2004/gcc-2019-2004.xml) - - [Word problems of groups, formal languages and decidability](https://personalpages.manchester.ac.uk/staff/Mark.Kambites/events/nbsan/nbsan17_thomas.pdf) + - [Word problems of groups, formal languages and decidability](https://personalpages.manchester.ac.uk/staff/Mark.Kambites/events/nbsan/nbsan17_thomas.pdf) - Прочая интересная математика. - Немного топологии в теории формальных языков: [Salvati S. On is an n-MCFL. – 2018.](https://hal.archives-ouvertes.fr/hal-01771670/) - Salvati S. MIX is a 2-MCFL and the word problem in Z2 is captured by the IO and the OI hierarchies //Journal of Computer and System Sciences. -- 2015. -- Т. 81. -- \textnumero. 7. -- С. 1252-1277. - О том, как задачи из теории графов связаны с теорией формальных языков: Abboud, Amir \& Backurs, Arturs \& Williams, Virginia. (2015). If the Current Clique Algorithms are Optimal, So is Valiant's Parser. 98-117. 10.1109/FOCS.2015.16. - - [A context-free grammar for the Ramanujan-Shor polynomials](https://www.sciencedirect.com/science/article/abs/pii/S0196885819300739) + - [A context-free grammar for the Ramanujan-Shor polynomials](https://www.sciencedirect.com/science/article/abs/pii/S0196885819300739) diff --git a/project/hw1/task.py b/project/hw1/task.py new file mode 100644 index 000000000..b35af446f --- /dev/null +++ b/project/hw1/task.py @@ -0,0 +1,16 @@ +import cfpq_data +from cfpq_data.graphs.generators import labeled_two_cycles_graph +from networkx.drawing.nx_pydot import write_dot + + +def getGraphInfoByName(graphName: str): + graph_csv = cfpq_data.download(graphName) + graph = cfpq_data.graph_from_csv(graph_csv) + return graph.number_of_nodes(),\ + graph.number_of_edges(),\ + set(map(lambda x: x[2]['label'], graph.edges(data=True))) + +def createBiSycleGraph(cSize1: int, cSize2: int, labels: set[str], path: str): + graph = labeled_two_cycles_graph(n=cSize1, m=cSize2, labels=labels) + print(path) + write_dot(graph, path) \ No newline at end of file diff --git a/tests/test_hw1_task.py b/tests/test_hw1_task.py new file mode 100644 index 000000000..e12ca46ba --- /dev/null +++ b/tests/test_hw1_task.py @@ -0,0 +1,38 @@ +import pytest + +from tempfile import NamedTemporaryFile +from project.hw1.task import ( + createBiSycleGraph, + getGraphInfoByName +) + +def test_get_graph_info_by_bzip_name(): + nodes, edge, labels = getGraphInfoByName("bzip") + assert nodes == 632 + assert edge == 556 + assert labels == {'a', 'd'} + +def test_get_graph_info_by_biomedical_name(): + nodes, edge, labels = getGraphInfoByName("biomedical") + assert nodes == 341 + assert edge == 459 + assert labels == {'type', 'label', 'subClassOf', 'comment', 'versionInfo', + 'title', 'language', 'publisher', 'description', 'creator'} + +def test_create_bisycle_graph(): + target = ["digraph {"] + for i in [1, 2, 3, 0]: + target.append(f"{i};") + for j in range(3, 8): + target.append(f"{(j + 1)};") + for i in [(1, 2), (2, 3), (3, 0), (0, 1)]: + target.append(f"{i[0]} -> {i[1]} [key=0, label=a];") + for j in [(0, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 0)]: + target.append(f"{j[0]} -> {j[1]} [key=0, label=b];") + target.append("}") + target.append("") + + with NamedTemporaryFile("w+") as tmp: + createBiSycleGraph(3, 5, ('a', 'b'), tmp.name) + result = tmp.read() + assert result == "\n".join(target) \ No newline at end of file From f68f8ccab9338a3241020eb75af8fb401df1a3ed Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 14 Feb 2024 20:58:57 +0300 Subject: [PATCH 2/4] [HW-#1.1] Add test workflow --- .github/workflows/test.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 000000000..e33ac8b31 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,20 @@ +name: Python application + +on: + [ push, pull_request ] + +jobs: + tests: + runs-on: ubuntu-latest + steps: + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + - name: Test with pytest + run: | + python ./scripts/run_tests.py \ No newline at end of file From ede8c9bc5aae72448661651208825859913c7433 Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 14 Feb 2024 21:39:39 +0300 Subject: [PATCH 3/4] [HW-#1.2] Style fixes and python version decrement --- .github/workflows/test.yml | 12 +++++++----- project/hw1/task.py | 11 +++++++---- tests/test_hw1_task.py | 30 ++++++++++++++++++++---------- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e33ac8b31..0cf68bd05 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,14 +7,16 @@ jobs: tests: runs-on: ubuntu-latest steps: - - name: Set up Python 3.10 - uses: actions/setup-python@v3 + - name: Set up Git repository + uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 with: - python-version: "3.10" + python-version: "3.8" - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install -r requirements.txt + python -m pip install -r ./requirements.txt - name: Test with pytest run: | - python ./scripts/run_tests.py \ No newline at end of file + python ./scripts/run_tests.py diff --git a/project/hw1/task.py b/project/hw1/task.py index b35af446f..21cf3c058 100644 --- a/project/hw1/task.py +++ b/project/hw1/task.py @@ -6,11 +6,14 @@ def getGraphInfoByName(graphName: str): graph_csv = cfpq_data.download(graphName) graph = cfpq_data.graph_from_csv(graph_csv) - return graph.number_of_nodes(),\ - graph.number_of_edges(),\ - set(map(lambda x: x[2]['label'], graph.edges(data=True))) + return ( + graph.number_of_nodes(), + graph.number_of_edges(), + set(map(lambda x: x[2]["label"], graph.edges(data=True))), + ) + def createBiSycleGraph(cSize1: int, cSize2: int, labels: set[str], path: str): graph = labeled_two_cycles_graph(n=cSize1, m=cSize2, labels=labels) print(path) - write_dot(graph, path) \ No newline at end of file + write_dot(graph, path) diff --git a/tests/test_hw1_task.py b/tests/test_hw1_task.py index e12ca46ba..63f2c8e56 100644 --- a/tests/test_hw1_task.py +++ b/tests/test_hw1_task.py @@ -1,24 +1,34 @@ import pytest from tempfile import NamedTemporaryFile -from project.hw1.task import ( - createBiSycleGraph, - getGraphInfoByName -) +from project.hw1.task import createBiSycleGraph, getGraphInfoByName + def test_get_graph_info_by_bzip_name(): nodes, edge, labels = getGraphInfoByName("bzip") assert nodes == 632 assert edge == 556 - assert labels == {'a', 'd'} + assert labels == {"a", "d"} + def test_get_graph_info_by_biomedical_name(): nodes, edge, labels = getGraphInfoByName("biomedical") assert nodes == 341 assert edge == 459 - assert labels == {'type', 'label', 'subClassOf', 'comment', 'versionInfo', - 'title', 'language', 'publisher', 'description', 'creator'} - + assert labels == { + "type", + "label", + "subClassOf", + "comment", + "versionInfo", + "title", + "language", + "publisher", + "description", + "creator", + } + + def test_create_bisycle_graph(): target = ["digraph {"] for i in [1, 2, 3, 0]: @@ -33,6 +43,6 @@ def test_create_bisycle_graph(): target.append("") with NamedTemporaryFile("w+") as tmp: - createBiSycleGraph(3, 5, ('a', 'b'), tmp.name) + createBiSycleGraph(3, 5, ("a", "b"), tmp.name) result = tmp.read() - assert result == "\n".join(target) \ No newline at end of file + assert result == "\n".join(target) From e234e49ba12b1de40806493af55efff7a3852bf5 Mon Sep 17 00:00:00 2001 From: Skom Erik Date: Wed, 14 Feb 2024 22:02:02 +0300 Subject: [PATCH 4/4] [HW-#1.3] Remove return type annotations --- project/hw1/task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/hw1/task.py b/project/hw1/task.py index 21cf3c058..00879dfb5 100644 --- a/project/hw1/task.py +++ b/project/hw1/task.py @@ -13,7 +13,7 @@ def getGraphInfoByName(graphName: str): ) -def createBiSycleGraph(cSize1: int, cSize2: int, labels: set[str], path: str): +def createBiSycleGraph(cSize1: int, cSize2: int, labels: set, path: str): graph = labeled_two_cycles_graph(n=cSize1, m=cSize2, labels=labels) print(path) write_dot(graph, path)