From 9e64f77aa35867eb97e3c18d83e2f6aab5e45747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= Date: Tue, 12 Sep 2023 21:35:56 +0200 Subject: [PATCH] test: fetch code blocks in docstrings via markdown extension --- .github/workflows/documentation.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/tests.yml | 2 +- Makefile | 4 +- contributing.md | 4 +- pyproject.toml | 16 +-- tests/extract_docs_code.py | 156 ++++++++++++++++++++++++++++ tests/test_docs.py | 38 ++++--- 8 files changed, 196 insertions(+), 28 deletions(-) create mode 100644 tests/extract_docs_code.py diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index ca281dbd5..13108923b 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -13,7 +13,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install '.[docs]' + pip install '.[dev]' - name: Set up Git run: | git config user.name ${{ github.actor }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 540fd7fc8..6f8dba5fb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -72,7 +72,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install '.[docs]' + pip install '.[dev]' - name: Set up Git run: | git config user.name ${{ github.actor }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 01a2133be..d1f23bc45 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -79,7 +79,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install '.[docs]' + pip install '.[dev]' - name: Build documentation run: | mkdocs build --clean diff --git a/Makefile b/Makefile index 6c1b824f0..11944d17c 100644 --- a/Makefile +++ b/Makefile @@ -14,14 +14,14 @@ create-env: .venv install : .venv . .venv/bin/activate - pip install -r '.[dev,docs,setup]'.txt + pip install -r '.[dev,setup]'.txt python scripts/conjugate_verbs.py pip install -e . pre-commit install documentation: .venv . .venv/bin/activate - pip install -e '.[docs]' + pip install -e '.[dev]' mkdocs serve test: .venv diff --git a/contributing.md b/contributing.md index 2ca7634f2..aba1d1f0d 100644 --- a/contributing.md +++ b/contributing.md @@ -24,7 +24,7 @@ $ python -m venv venv $ source venv/bin/activate # Install the package with common, dev, setup dependencies in editable mode -$ pip install -e '.[dev,docs,setup]' +$ pip install -e '.[dev,setup]' # And build resources $ python scripts/conjugate_verbs.py ``` @@ -113,7 +113,7 @@ We use `MkDocs` for EDS-NLP's documentation. You can checkout the changes you ma ```console # Install the requirements -$ pip install -e '.[docs]' +$ pip install -e '.[dev]' ---> 100% color:green Installation successful diff --git a/pyproject.toml b/pyproject.toml index bdd337357..2ad72e398 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,12 +41,8 @@ dev = [ "pytest-cov>=3.0.0,<4.0.0", "pytest-html>=3.1.1,<4.0.0", "torch>=1.0.0", -] -setup = [ - "mlconjug3<3.9.0", - "typer" -] -docs = [ + + # docs "mike~=1.1.2", "mkdocs-charts-plugin==0.0.8", "mkdocs-img2fig-plugin==0.9.3", @@ -59,6 +55,10 @@ docs = [ "pybtex~=0.24.0", "pathspec>=0.11.1", # required by vendored mkdocs-autorefs PR ] +setup = [ + "mlconjug3<3.9.0", + "typer" +] [project.urls] "Source Code" = "https://github.com/aphp/edsnlp" @@ -151,6 +151,10 @@ where = ["."] [project.entry-points."spacy_languages"] "eds" = "edsnlp.language:EDSLanguage" +[project.entry-points."mkdocs.plugins"] +"bibtex" = "docs.scripts.bibtex:BibTexPlugin" +"autorefs" = "docs.scripts.autorefs.plugin:AutorefsPlugin" + [build-system] requires = [ "setuptools", diff --git a/tests/extract_docs_code.py b/tests/extract_docs_code.py new file mode 100644 index 000000000..1faec44ef --- /dev/null +++ b/tests/extract_docs_code.py @@ -0,0 +1,156 @@ +import re +import shutil +import tempfile +from textwrap import dedent +from typing import Tuple + +from markdown.extensions import Extension +from markdown.extensions.attr_list import get_attrs +from markdown.extensions.codehilite import parse_hl_lines +from markdown.extensions.fenced_code import FencedBlockPreprocessor +from mkdocs.commands.build import build +from mkdocs.config import load_config +from mkdocs.config.config_options import Type as MkType +from mkdocs.config.defaults import MkDocsConfig +from mkdocs.plugins import BasePlugin +from mkdocstrings.extension import AutoDocProcessor +from mkdocstrings.plugin import MkdocstringsPlugin + +BRACKET_RE = re.compile(r"\[([^\[]+)\]") +CITE_RE = re.compile(r"@([\w_:-]+)") +DEF_RE = re.compile(r"\A {0,3}\[@([\w_:-]+)\]:\s*(.*)") +INDENT_RE = re.compile(r"\A\t| {4}(.*)") + +CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])" + + +class PyCodePreprocessor(FencedBlockPreprocessor): + """Gather reference definitions and citation keys""" + + FENCED_BLOCK_RE = re.compile( + dedent( + r""" + (?P^[ ]*(?:~{3,}|`{3,}))[ ]* # opening fence + ((\{(?P[^\}\n]*)\})| # (optional {attrs} or + (\.?(?P[\w#.+-]*)[ ]*)? # optional (.)lang + (hl_lines=(?P"|')(?P.*?)(?P=quot)[ ]*)?) # optional hl_lines) + \n # newline (end of opening fence) + (?P.*?)(?<=\n) # the code block + (?P=fence)[ ]*$ # closing fence + """ # noqa: E501 + ), + re.MULTILINE | re.DOTALL | re.VERBOSE, + ) + + def __init__(self, md, code_blocks): + super().__init__(md, {}) + self.code_blocks = code_blocks + + def run(self, lines): + text = "\n".join(lines) + if 'nlp.add_pipe(f"eds.aids")' in text: + print("TEXT", text) + while True: + # ---- https://github.com/Python-Markdown/markdown/blob/5a2fee/markdown/extensions/fenced_code.py#L84C9-L98 # noqa: E501 + m = self.FENCED_BLOCK_RE.search(text) + if 'nlp.add_pipe(f"eds.aids")' in text: + print("CODE ==>", m.group("code") if m else None) + if m: + lang, id, classes, config = None, "", [], {} + if m.group("attrs"): + id, classes, config = self.handle_attrs(get_attrs(m.group("attrs"))) + if len(classes): + lang = classes.pop(0) + else: + if m.group("lang"): + lang = m.group("lang") + if m.group("hl_lines"): + # Support `hl_lines` outside of `attrs` for + # backward-compatibility + config["hl_lines"] = parse_hl_lines(m.group("hl_lines")) + # ---- + code = m.group("code") + + if lang == "python" and "no-check" not in classes: + self.code_blocks.append(dedent(code)) + else: + break + text = text[m.end() :] + + return lines + + +context_citations = None + + +class PyCodeExtension(Extension): + def __init__(self, code_blocks): + super(PyCodeExtension, self).__init__() + self.code_blocks = code_blocks + + def extendMarkdown(self, md): + self.md = md + md.registerExtension(self) + md.preprocessors.register( + PyCodePreprocessor(md, self.code_blocks), "fenced_code", 31 + ) + for ext in md.registeredExtensions: + if isinstance(ext, AutoDocProcessor): + ext._config["mdx"].append(self) + + +def makeExtension(*args, **kwargs): + return PyCodeExtension(*args, **kwargs) + + +class PyCodeExtractorPlugin(BasePlugin): + config_scheme: Tuple[Tuple[str, MkType]] = ( + # ("bibtex_file", MkType(str)), # type: ignore[assignment] + # ("order", MkType(str, default="unsorted")), # type: ignore[assignment] + ) + + def __init__(self, global_config): + self.global_config = global_config + self.page_code_blocks = [] + self.docs_code_blocks = [] + + def on_config(self, config: MkDocsConfig): + self.ext = PyCodeExtension(self.page_code_blocks) + # After pymdownx.highlight, because of weird registering deleting the first + # extension + config["markdown_extensions"].append(self.ext) + config["markdown_extensions"].remove("pymdownx.highlight") + config["markdown_extensions"].remove("fenced_code") + + def on_pre_build(self, *, config: MkDocsConfig): + mkdocstrings_plugin: MkdocstringsPlugin = config.plugins["mkdocstrings"] + mkdocstrings_plugin.get_handler("python") + + def on_page_content(self, html, page, config, files): + if len(self.page_code_blocks): + self.docs_code_blocks.append((page.url, "\n".join(self.page_code_blocks))) + self.page_code_blocks.clear() + return html + + +def extract_docs_code(): + config = load_config() + + temp_dir = tempfile.mkdtemp() + try: + config["site_dir"] = temp_dir + + # plug the pycode extractor plugin + plugin = PyCodeExtractorPlugin(config) + config.plugins["pycode_extractor"] = plugin + + config["plugins"].run_event("startup", command="build", dirty=False) + try: + build(config) + finally: + config["plugins"].run_event("shutdown") + + finally: + shutil.rmtree(temp_dir, ignore_errors=True) + + return plugin.docs_code_blocks diff --git a/tests/test_docs.py b/tests/test_docs.py index f62bafcb5..6d239aaa6 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,23 +1,31 @@ -from itertools import chain -from pathlib import Path - import pytest +from extract_docs_code import extract_docs_code + +url_to_code = dict(extract_docs_code()) -from edsnlp.utils.blocs import check_md_file -# @pytest.fixture(autouse=True, scope="module") -# def brat_folder(): -# yield -# shutil.rmtree("path/to/brat") +def printer(code: str) -> None: + """ + Prints a code bloc with lines for easier debugging. + Parameters + ---------- + code : str + Code bloc. + """ + lines = [] + for i, line in enumerate(code.split("\n")): + lines.append(f"{i + 1:03} {line}") -files = chain( - Path("./").glob("*.md"), - Path("docs").glob("**/*.md"), -) + print("\n".join(lines)) # Note the use of `str`, makes for pretty output -@pytest.mark.parametrize("path", files, ids=str) -def test_code_blocks(path): - check_md_file(path=path, memory=True) +@pytest.mark.parametrize("url", sorted(url_to_code.keys()), ids=str) +def test_code_blocks(url): + raw = url_to_code[url] + try: + exec(raw, {"__MODULE__": "__main__"}) + except Exception: + printer(raw) + raise