diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 000000000..0cf68bd05 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,22 @@ +name: Python application + +on: + [ push, pull_request ] + +jobs: + tests: + runs-on: ubuntu-latest + steps: + - name: Set up Git repository + uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: "3.8" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -r ./requirements.txt + - name: Test with pytest + run: | + python ./scripts/run_tests.py diff --git a/README.md b/README.md index b18fdf77f..ffc866079 100644 --- a/README.md +++ b/README.md @@ -168,28 +168,28 @@ - Синтаксический анализ языков программирования: в компиляторах, интерпертаторах, средах разработки, других инстументах. - Анализ естественных языков. Активность в этой области несколько спала, так как на передний план сейчас вышли различные методы машинного обучения. - Однако и в этой области ведуться работы.Например, [International Conference on Parsing Technologies](http://www.wikicfp.com/cfp/program?id=1853). + Однако и в этой области ведуться работы.Например, [International Conference on Parsing Technologies](http://www.wikicfp.com/cfp/program?id=1853). - Статический анализ кода. - Различные задачи межпроцедурного анализа. Основной подход --- language reachability. Основоположник --- Томас Репс. Примеры работ. - Thomas Reps. 1997. Program analysis via graph reachability. In Proceedings of the 1997 international symposium on Logic programming (ILPS ’97). MIT Press, Cambridge, MA, USA, 5–19. - Qirun Zhang and Zhendong Su. 2017. Context-sensitive data-dependence analysis via linear conjunctive language reachability. In Proceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming Languages (POPL 2017). Association for Computing Machinery, New York, NY, USA, 344–358. DOI:https://doi.org/10.1145/3009837.3009848 - Kai Wang, Aftab Hussain, Zhiqiang Zuo, Guoqing Xu, and Ardalan Amiri Sani. 2017. Graspan: A Single-machine Disk-based Graph System for Interprocedural Static Analyses of Large-scale Systems Code. In Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS ’17). Association for Computing Machinery, New York, NY, USA, 389–404. DOI:https://doi.org/10.1145/3037697.3037744 - - Lu Y., Shang L., Xie X., Xue J. (2013) An Incremental Points-to Analysis with CFL-Reachability. In: Jhala R., De Bosschere K. (eds) Compiler Construction. CC 2013. Lecture Notes in Computer Science, vol 7791. Springer, Berlin, Heidelberg + - Lu Y., Shang L., Xie X., Xue J. (2013) An Incremental Points-to Analysis with CFL-Reachability. In: Jhala R., De Bosschere K. (eds) Compiler Construction. CC 2013. Lecture Notes in Computer Science, vol 7791. Springer, Berlin, Heidelberg - Интерливинг (или шафл) языков для верификаци многопоточных программ. - [Approximating the Shuffle of Context-free Languages to Find Bugs in Concurrent Recursive Programs](http://uu.diva-portal.org/smash/get/diva2:442518/FULLTEXT01.pdf) - Flick N.E. (2015) Quotients of Unbounded Parallelism. In: Leucker M., Rueda C., Valencia F. (eds) Theoretical Aspects of Computing - ICTAC 2015. ICTAC 2015. Lecture Notes in Computer Science, vol 9399. Springer, Cham - + - Система типов Java: [Radu Grigore, Java Generics are Turing Complete](https://arxiv.org/abs/1605.05274). - Графовые базы данных. Поиск путей с ограничениями. - Maurizio Nolé and Carlo Sartiani. 2016. Regular Path Queries on Massive Graphs. In Proceedings of the 28th International Conference on Scientific and Statistical Database Management (SSDBM ’16). Association for Computing Machinery, New York, NY, USA, Article 13, 1–12. DOI:https://doi.org/10.1145/2949689.2949711 - Jochem Kuijpers, George Fletcher, Nikolay Yakovets, and Tobias Lindaaker. 2019. An Experimental Study of Context-Free Path Query Evaluation Methods. In Proceedings of the 31st International Conference on Scientific and Statistical Database Management (SSDBM ’19). Association for Computing Machinery, New York, NY, USA, 121–132. DOI:https://doi.org/10.1145/3335783.3335791 - [Jelle Hellings. Querying for Paths in Graphs using Context-Free Path Queries.](https://arxiv.org/abs/1502.02242) - + - Биоинформатика. В основном это анализ геномных и белковых последовательностей. - [Witold Dyrka, Mateusz Pyzik, Francois Coste, and Hugo Talibart. Estimating probabilistic context-free grammars for proteins using contact map constraints.](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6428041/) - [James WJ Anderson, Paula Tataru, Joe Staines, Jotun Hein, and Rune Lyngso. Evolving stochastic context-free grammars for RNA secondary structure prediction.](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3464655/) - - [Ryan Zier-Vogel. Predicting RNA secondary structure using a stochastic conjunctive grammar.](https://www.semanticscholar.org/paper/Predicting-RNA-secondary-structure-using-a-grammar-Zier-Vogel/90bb312cb1a0f61eddb7a8b5b782bb40630894dd). + - [Ryan Zier-Vogel. Predicting RNA secondary structure using a stochastic conjunctive grammar.](https://www.semanticscholar.org/paper/Predicting-RNA-secondary-structure-using-a-grammar-Zier-Vogel/90bb312cb1a0f61eddb7a8b5b782bb40630894dd). - Машинное обучение. - [Matt J. Kusner, Brooks Paige, José Miguel Hernández-Lobato. Grammar Variational Autoencoder](https://arxiv.org/abs/1703.01925). Опубликована в 2017 году и уже [больше 950 цитирований.](https://scholar.google.com/scholar?cites=4080460899049502885&as_sdt=2005&sciodt=0,5&hl=ru) @@ -203,17 +203,17 @@ - [HYPEREDGE REPLACEMENT GRAPH GRAMMARS](https://people.cs.umu.se/drewes/biblio/ps-files/hrg.pdf) - [(Re)introducing Regular Graph Languages](https://www.aclweb.org/anthology/W17-3410.pdf) - [Hyperedge Replacement: Grammars and Languages](https://www.springer.com/gp/book/9783540560050) - - $\ldots$ + - $\ldots$ - Теория групп. Как правило, это проблема слов группы или дополнение к ней. - Anisimov, A.V. Group languages. Cybern Syst Anal (1971) 7: 594. - David E. Muller, Paul E. Schupp, Groups, the Theory of ends, and context-free languages, Journal of Computer and System Sciences, Volume 26, Issue 3, 1983, Pages 295-310, ISSN 0022-0000 - HOLT, D., REES, S., ROVER, C., \& THOMAS, R. (2005). GROUPS WITH CONTEXT-FREE CO-WORD PROBLEM. Journal of the London Mathematical Society, 71(3), 643-657. doi:10.1112/S002461070500654X - [Groups with Context-Free Co-Word Problem and Embeddings into Thompson's Group V](https://arxiv.org/abs/1407.7745) - [Kropholler, R. \& Spriano, D. (2019). Closure properties in the class of multiple context-free groups. Groups Complexity Cryptology, 11(1), pp. 1-15. Retrieved 13 Feb. 2020, from doi:10.1515/gcc-2019-2004](https://www.degruyter.com/view/j/gcc.2019.11.issue-1/gcc-2019-2004/gcc-2019-2004.xml) - - [Word problems of groups, formal languages and decidability](https://personalpages.manchester.ac.uk/staff/Mark.Kambites/events/nbsan/nbsan17_thomas.pdf) + - [Word problems of groups, formal languages and decidability](https://personalpages.manchester.ac.uk/staff/Mark.Kambites/events/nbsan/nbsan17_thomas.pdf) - Прочая интересная математика. - Немного топологии в теории формальных языков: [Salvati S. On is an n-MCFL. – 2018.](https://hal.archives-ouvertes.fr/hal-01771670/) - Salvati S. MIX is a 2-MCFL and the word problem in Z2 is captured by the IO and the OI hierarchies //Journal of Computer and System Sciences. -- 2015. -- Т. 81. -- \textnumero. 7. -- С. 1252-1277. - О том, как задачи из теории графов связаны с теорией формальных языков: Abboud, Amir \& Backurs, Arturs \& Williams, Virginia. (2015). If the Current Clique Algorithms are Optimal, So is Valiant's Parser. 98-117. 10.1109/FOCS.2015.16. - - [A context-free grammar for the Ramanujan-Shor polynomials](https://www.sciencedirect.com/science/article/abs/pii/S0196885819300739) + - [A context-free grammar for the Ramanujan-Shor polynomials](https://www.sciencedirect.com/science/article/abs/pii/S0196885819300739) diff --git a/project/hw1/task.py b/project/hw1/task.py new file mode 100644 index 000000000..00879dfb5 --- /dev/null +++ b/project/hw1/task.py @@ -0,0 +1,19 @@ +import cfpq_data +from cfpq_data.graphs.generators import labeled_two_cycles_graph +from networkx.drawing.nx_pydot import write_dot + + +def getGraphInfoByName(graphName: str): + graph_csv = cfpq_data.download(graphName) + graph = cfpq_data.graph_from_csv(graph_csv) + return ( + graph.number_of_nodes(), + graph.number_of_edges(), + set(map(lambda x: x[2]["label"], graph.edges(data=True))), + ) + + +def createBiSycleGraph(cSize1: int, cSize2: int, labels: set, path: str): + graph = labeled_two_cycles_graph(n=cSize1, m=cSize2, labels=labels) + print(path) + write_dot(graph, path) diff --git a/tests/test_hw1_task.py b/tests/test_hw1_task.py new file mode 100644 index 000000000..63f2c8e56 --- /dev/null +++ b/tests/test_hw1_task.py @@ -0,0 +1,48 @@ +import pytest + +from tempfile import NamedTemporaryFile +from project.hw1.task import createBiSycleGraph, getGraphInfoByName + + +def test_get_graph_info_by_bzip_name(): + nodes, edge, labels = getGraphInfoByName("bzip") + assert nodes == 632 + assert edge == 556 + assert labels == {"a", "d"} + + +def test_get_graph_info_by_biomedical_name(): + nodes, edge, labels = getGraphInfoByName("biomedical") + assert nodes == 341 + assert edge == 459 + assert labels == { + "type", + "label", + "subClassOf", + "comment", + "versionInfo", + "title", + "language", + "publisher", + "description", + "creator", + } + + +def test_create_bisycle_graph(): + target = ["digraph {"] + for i in [1, 2, 3, 0]: + target.append(f"{i};") + for j in range(3, 8): + target.append(f"{(j + 1)};") + for i in [(1, 2), (2, 3), (3, 0), (0, 1)]: + target.append(f"{i[0]} -> {i[1]} [key=0, label=a];") + for j in [(0, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 0)]: + target.append(f"{j[0]} -> {j[1]} [key=0, label=b];") + target.append("}") + target.append("") + + with NamedTemporaryFile("w+") as tmp: + createBiSycleGraph(3, 5, ("a", "b"), tmp.name) + result = tmp.read() + assert result == "\n".join(target)