-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
docs: make most of our API clickable
- Loading branch information
Showing
9 changed files
with
274 additions
and
169 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
# Based on https://github.com/darwindarak/mdx_bib | ||
import os | ||
import re | ||
from bisect import bisect_right | ||
from typing import Tuple | ||
|
||
import jedi | ||
import mkdocs.structure.pages | ||
import parso | ||
import regex | ||
from mkdocs.config.config_options import Type as MkType | ||
from mkdocs.config.defaults import MkDocsConfig | ||
from mkdocs.plugins import BasePlugin | ||
|
||
from docs.scripts.autorefs.plugin import AutorefsPlugin | ||
|
||
try: | ||
from importlib.metadata import entry_points | ||
except ImportError: | ||
from importlib_metadata import entry_points | ||
|
||
|
||
from bs4 import BeautifulSoup | ||
|
||
BRACKET_RE = re.compile(r"\[([^\[]+)\]") | ||
CITE_RE = re.compile(r"@([\w_:-]+)") | ||
DEF_RE = re.compile(r"\A {0,3}\[@([\w_:-]+)\]:\s*(.*)") | ||
INDENT_RE = re.compile(r"\A\t| {4}(.*)") | ||
|
||
HREF_REGEX = ( | ||
r"(?<=<\s*(?:a[^>]*href|img[^>]*src)=)" | ||
r'(?:"([^"]*)"|\'([^\']*)|[ ]*([^ =>]*)(?![a-z]+=))' | ||
) | ||
# Maybe find something less specific ? | ||
PIPE_REGEX = r"(?<![a-zA-Z0-9._-])eds[.]([a-zA-Z0-9._-]*)(?![a-zA-Z0-9._-])" | ||
|
||
HTML_PIPE_REGEX = r"""(?x) | ||
(?<![a-zA-Z0-9._-]) | ||
<span[^>]*>eds<\/span> | ||
<span[^>]*>[.]<\/span> | ||
<span[^>]*>([a-zA-Z0-9._-]*)<\/span> | ||
(?![a-zA-Z0-9._-]) | ||
""" | ||
|
||
CITATION_RE = r"(\[@(?:[\w_:-]+)(?: *, *@(?:[\w_:-]+))*\])" | ||
|
||
|
||
class ClickableSnippetsPlugin(BasePlugin): | ||
config_scheme: Tuple[Tuple[str, MkType]] = ( | ||
# ("bibtex_file", MkType(str)), # type: ignore[assignment] | ||
# ("order", MkType(str, default="unsorted")), # type: ignore[assignment] | ||
) | ||
|
||
@mkdocs.plugins.event_priority(1000) | ||
def on_config(self, config: MkDocsConfig): | ||
for event_name, events in config.plugins.events.items(): | ||
for event in list(events): | ||
if "autorefs" in str(event): | ||
events.remove(event) | ||
old_plugin = config["plugins"]["autorefs"] | ||
plugin_config = dict(old_plugin.config) | ||
plugin = AutorefsPlugin() | ||
config.plugins["autorefs"] = plugin | ||
config["plugins"]["autorefs"] = plugin | ||
plugin.load_config(plugin_config) | ||
|
||
@classmethod | ||
def get_ep_namespace(cls, ep, namespace): | ||
if hasattr(ep, "select"): | ||
return ep.select(group=namespace) | ||
else: # dict | ||
return ep.get(namespace, []) | ||
|
||
@mkdocs.plugins.event_priority(-1000) | ||
def on_post_page( | ||
self, | ||
output: str, | ||
page: mkdocs.structure.pages.Page, | ||
config: mkdocs.config.Config, | ||
): | ||
""" | ||
1. Replace absolute paths with path relative to the rendered page | ||
This must be performed after all other plugins have run. | ||
2. Replace component names with links to the component reference | ||
Parameters | ||
---------- | ||
output | ||
page | ||
config | ||
Returns | ||
------- | ||
""" | ||
|
||
autorefs: AutorefsPlugin = config["plugins"]["autorefs"] | ||
ep = entry_points() | ||
spacy_factories_entry_points = { | ||
ep.name: ep.value | ||
for ep in ( | ||
*self.get_ep_namespace(ep, "spacy_factories"), | ||
*self.get_ep_namespace(ep, "edsnlp_factories"), | ||
) | ||
} | ||
|
||
def replace_component(match): | ||
full_group = match.group(0) | ||
name = "eds." + match.group(1) | ||
ep = spacy_factories_entry_points.get(name) | ||
preceding = output[match.start(0) - 50 : match.start(0)] | ||
if ep is not None and "DEFAULT:" not in preceding: | ||
try: | ||
url = autorefs.get_item_url(ep.replace(":", ".")) | ||
except KeyError: | ||
pass | ||
else: | ||
return f"<a href={url}>{name}</a>" | ||
return full_group | ||
|
||
def replace_link(match): | ||
relative_url = url = match.group(1) or match.group(2) or match.group(3) | ||
page_url = os.path.join("/", page.file.url) | ||
if url.startswith("/"): | ||
relative_url = os.path.relpath(url, page_url) | ||
return f'"{relative_url}"' | ||
|
||
output = regex.sub(PIPE_REGEX, replace_component, output) | ||
output = regex.sub(HTML_PIPE_REGEX, replace_component, output) | ||
output = regex.sub(HREF_REGEX, replace_link, output) | ||
|
||
all_snippets = "" | ||
all_offsets = [] | ||
all_nodes = [] | ||
|
||
soups = [] | ||
|
||
# Replace absolute paths with path relative to the rendered page | ||
for match in regex.finditer("<code>.*?</code>", output, flags=regex.DOTALL): | ||
node = match.group(0) | ||
if "\n" in node: | ||
soup, snippet, python_offsets, html_nodes = self.convert_html_to_code( | ||
node | ||
) | ||
size = len(all_snippets) | ||
all_snippets += snippet + "\n" | ||
all_offsets.extend([size + i for i in python_offsets]) | ||
all_nodes.extend(html_nodes) | ||
soups.append((soup, match.start(0), match.end(0))) | ||
|
||
interpreter = jedi.Interpreter(all_snippets, [{}]) | ||
line_lengths = [0] | ||
for line in all_snippets.split("\n"): | ||
line_lengths.append(len(line) + line_lengths[-1] + 1) | ||
line_lengths[-1] -= 1 | ||
|
||
# print(all_snippets) | ||
# print("----") | ||
for name in self.iter_names(interpreter._module_node): | ||
try: | ||
line, col = name.start_pos | ||
offset = line_lengths[line - 1] + col | ||
node_idx = bisect_right(all_offsets, offset) - 1 | ||
|
||
node = all_nodes[node_idx] | ||
goto = (interpreter.goto(line, col, follow_imports=True) or [None])[0] | ||
if ( | ||
goto | ||
and goto.full_name | ||
and goto.full_name.startswith("edsnlp") | ||
and goto.type != "module" | ||
): | ||
url = autorefs.get_item_url(goto.full_name) | ||
# Check if node has no link in its upstream ancestors | ||
if not node.find_parents("a"): | ||
node.replace_with( | ||
BeautifulSoup( | ||
f'<a class="discrete-link" href="{url}">{node}</a>', | ||
"html5lib", | ||
) | ||
) | ||
except Exception: | ||
pass | ||
# print("\n\n") | ||
|
||
# Re-insert soups into the output | ||
for soup, start, end in reversed(soups): | ||
output = output[:start] + str(soup) + output[end:] | ||
|
||
return output | ||
|
||
@classmethod | ||
def iter_names(cls, root): | ||
if isinstance(root, parso.python.tree.Name): | ||
yield root | ||
for child in getattr(root, "children", ()): | ||
yield from cls.iter_names(child) | ||
|
||
@classmethod | ||
def convert_html_to_code(cls, html_content: str) -> Tuple[str, list, list]: | ||
pre_html_content = "<pre>" + html_content + "</pre>" | ||
soup = BeautifulSoup(pre_html_content, "html5lib") | ||
code_element = soup.find("code") | ||
|
||
line_lengths = [0] | ||
for line in pre_html_content.split("\n"): | ||
line_lengths.append(len(line) + line_lengths[-1] + 1) | ||
line_lengths[-1] -= 1 | ||
|
||
python_code = "" | ||
code_offsets = [] | ||
# html_offsets = [0] # <pre> | ||
html_nodes = [] | ||
code_offset = 0 | ||
|
||
def extract_text_with_offsets(el): | ||
nonlocal python_code, code_offset | ||
for content in el.contents: | ||
# Recursively process child elements | ||
if isinstance(content, str): | ||
python_code += content | ||
code_offsets.append(code_offset) | ||
code_offset += len(content) | ||
html_nodes.append(content) | ||
continue | ||
extract_text_with_offsets(content) | ||
|
||
extract_text_with_offsets(code_element) | ||
# html_offsets = html_offsets[1:] | ||
|
||
return soup, python_code, code_offsets, html_nodes | ||
|
||
# print("\nOffset Mapping (Python Index -> HTML Index):") | ||
# for mapping in offset_mapping: | ||
# print(mapping) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.