Skip to content

Commit

Permalink
test: fetch code blocks in docstrings via markdown extension
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Sep 13, 2023
1 parent a77d3d1 commit 9e64f77
Show file tree
Hide file tree
Showing 8 changed files with 196 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install '.[docs]'
pip install '.[dev]'
- name: Set up Git
run: |
git config user.name ${{ github.actor }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install '.[docs]'
pip install '.[dev]'
- name: Set up Git
run: |
git config user.name ${{ github.actor }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install '.[docs]'
pip install '.[dev]'
- name: Build documentation
run: |
mkdocs build --clean
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ create-env: .venv

install : .venv
. .venv/bin/activate
pip install -r '.[dev,docs,setup]'.txt
pip install -r '.[dev,setup]'.txt
python scripts/conjugate_verbs.py
pip install -e .
pre-commit install

documentation: .venv
. .venv/bin/activate
pip install -e '.[docs]'
pip install -e '.[dev]'
mkdocs serve

test: .venv
Expand Down
4 changes: 2 additions & 2 deletions contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ $ python -m venv venv
$ source venv/bin/activate

# Install the package with common, dev, setup dependencies in editable mode
$ pip install -e '.[dev,docs,setup]'
$ pip install -e '.[dev,setup]'
# And build resources
$ python scripts/conjugate_verbs.py
```
Expand Down Expand Up @@ -113,7 +113,7 @@ We use `MkDocs` for EDS-NLP's documentation. You can checkout the changes you ma

```console
# Install the requirements
$ pip install -e '.[docs]'
$ pip install -e '.[dev]'
---> 100%
color:green Installation successful

Expand Down
16 changes: 10 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,8 @@ dev = [
"pytest-cov>=3.0.0,<4.0.0",
"pytest-html>=3.1.1,<4.0.0",
"torch>=1.0.0",
]
setup = [
"mlconjug3<3.9.0",
"typer"
]
docs = [

# docs
"mike~=1.1.2",
"mkdocs-charts-plugin==0.0.8",
"mkdocs-img2fig-plugin==0.9.3",
Expand All @@ -59,6 +55,10 @@ docs = [
"pybtex~=0.24.0",
"pathspec>=0.11.1", # required by vendored mkdocs-autorefs PR
]
setup = [
"mlconjug3<3.9.0",
"typer"
]

[project.urls]
"Source Code" = "https://github.com/aphp/edsnlp"
Expand Down Expand Up @@ -151,6 +151,10 @@ where = ["."]
[project.entry-points."spacy_languages"]
"eds" = "edsnlp.language:EDSLanguage"

[project.entry-points."mkdocs.plugins"]
"bibtex" = "docs.scripts.bibtex:BibTexPlugin"
"autorefs" = "docs.scripts.autorefs.plugin:AutorefsPlugin"

[build-system]
requires = [
"setuptools",
Expand Down
156 changes: 156 additions & 0 deletions tests/extract_docs_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import re
import shutil
import tempfile
from textwrap import dedent
from typing import Tuple

from markdown.extensions import Extension
from markdown.extensions.attr_list import get_attrs
from markdown.extensions.codehilite import parse_hl_lines
from markdown.extensions.fenced_code import FencedBlockPreprocessor
from mkdocs.commands.build import build
from mkdocs.config import load_config
from mkdocs.config.config_options import Type as MkType
from mkdocs.config.defaults import MkDocsConfig
from mkdocs.plugins import BasePlugin
from mkdocstrings.extension import AutoDocProcessor
from mkdocstrings.plugin import MkdocstringsPlugin

BRACKET_RE = re.compile(r"\[([^\[]+)\]")
CITE_RE = re.compile(r"@([\w_:-]+)")
DEF_RE = re.compile(r"\A {0,3}\[@([\w_:-]+)\]:\s*(.*)")
INDENT_RE = re.compile(r"\A\t| {4}(.*)")

CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])"


class PyCodePreprocessor(FencedBlockPreprocessor):
"""Gather reference definitions and citation keys"""

FENCED_BLOCK_RE = re.compile(
dedent(
r"""
(?P<fence>^[ ]*(?:~{3,}|`{3,}))[ ]* # opening fence
((\{(?P<attrs>[^\}\n]*)\})| # (optional {attrs} or
(\.?(?P<lang>[\w#.+-]*)[ ]*)? # optional (.)lang
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines)
\n # newline (end of opening fence)
(?P<code>.*?)(?<=\n) # the code block
(?P=fence)[ ]*$ # closing fence
""" # noqa: E501
),
re.MULTILINE | re.DOTALL | re.VERBOSE,
)

def __init__(self, md, code_blocks):
super().__init__(md, {})
self.code_blocks = code_blocks

def run(self, lines):
text = "\n".join(lines)
if 'nlp.add_pipe(f"eds.aids")' in text:
print("TEXT", text)
while True:
# ---- https://github.com/Python-Markdown/markdown/blob/5a2fee/markdown/extensions/fenced_code.py#L84C9-L98 # noqa: E501
m = self.FENCED_BLOCK_RE.search(text)
if 'nlp.add_pipe(f"eds.aids")' in text:
print("CODE ==>", m.group("code") if m else None)
if m:
lang, id, classes, config = None, "", [], {}
if m.group("attrs"):
id, classes, config = self.handle_attrs(get_attrs(m.group("attrs")))
if len(classes):
lang = classes.pop(0)
else:
if m.group("lang"):
lang = m.group("lang")
if m.group("hl_lines"):
# Support `hl_lines` outside of `attrs` for
# backward-compatibility
config["hl_lines"] = parse_hl_lines(m.group("hl_lines"))
# ----
code = m.group("code")

if lang == "python" and "no-check" not in classes:
self.code_blocks.append(dedent(code))
else:
break
text = text[m.end() :]

return lines


context_citations = None


class PyCodeExtension(Extension):
def __init__(self, code_blocks):
super(PyCodeExtension, self).__init__()
self.code_blocks = code_blocks

def extendMarkdown(self, md):
self.md = md
md.registerExtension(self)
md.preprocessors.register(
PyCodePreprocessor(md, self.code_blocks), "fenced_code", 31
)
for ext in md.registeredExtensions:
if isinstance(ext, AutoDocProcessor):
ext._config["mdx"].append(self)


def makeExtension(*args, **kwargs):
return PyCodeExtension(*args, **kwargs)


class PyCodeExtractorPlugin(BasePlugin):
config_scheme: Tuple[Tuple[str, MkType]] = (
# ("bibtex_file", MkType(str)), # type: ignore[assignment]
# ("order", MkType(str, default="unsorted")), # type: ignore[assignment]
)

def __init__(self, global_config):
self.global_config = global_config
self.page_code_blocks = []
self.docs_code_blocks = []

def on_config(self, config: MkDocsConfig):
self.ext = PyCodeExtension(self.page_code_blocks)
# After pymdownx.highlight, because of weird registering deleting the first
# extension
config["markdown_extensions"].append(self.ext)
config["markdown_extensions"].remove("pymdownx.highlight")
config["markdown_extensions"].remove("fenced_code")

def on_pre_build(self, *, config: MkDocsConfig):
mkdocstrings_plugin: MkdocstringsPlugin = config.plugins["mkdocstrings"]
mkdocstrings_plugin.get_handler("python")

def on_page_content(self, html, page, config, files):
if len(self.page_code_blocks):
self.docs_code_blocks.append((page.url, "\n".join(self.page_code_blocks)))
self.page_code_blocks.clear()
return html


def extract_docs_code():
config = load_config()

temp_dir = tempfile.mkdtemp()
try:
config["site_dir"] = temp_dir

# plug the pycode extractor plugin
plugin = PyCodeExtractorPlugin(config)
config.plugins["pycode_extractor"] = plugin

config["plugins"].run_event("startup", command="build", dirty=False)
try:
build(config)
finally:
config["plugins"].run_event("shutdown")

finally:
shutil.rmtree(temp_dir, ignore_errors=True)

return plugin.docs_code_blocks
38 changes: 23 additions & 15 deletions tests/test_docs.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
from itertools import chain
from pathlib import Path

import pytest
from extract_docs_code import extract_docs_code

url_to_code = dict(extract_docs_code())

from edsnlp.utils.blocs import check_md_file

# @pytest.fixture(autouse=True, scope="module")
# def brat_folder():
# yield
# shutil.rmtree("path/to/brat")
def printer(code: str) -> None:
"""
Prints a code bloc with lines for easier debugging.
Parameters
----------
code : str
Code bloc.
"""
lines = []
for i, line in enumerate(code.split("\n")):
lines.append(f"{i + 1:03} {line}")

files = chain(
Path("./").glob("*.md"),
Path("docs").glob("**/*.md"),
)
print("\n".join(lines))


# Note the use of `str`, makes for pretty output
@pytest.mark.parametrize("path", files, ids=str)
def test_code_blocks(path):
check_md_file(path=path, memory=True)
@pytest.mark.parametrize("url", sorted(url_to_code.keys()), ids=str)
def test_code_blocks(url):
raw = url_to_code[url]
try:
exec(raw, {"__MODULE__": "__main__"})
except Exception:
printer(raw)
raise

0 comments on commit 9e64f77

Please sign in to comment.