diff --git a/docs/pipelines/index.md b/docs/pipelines/index.md deleted file mode 100644 index c15d22e6d..000000000 --- a/docs/pipelines/index.md +++ /dev/null @@ -1,48 +0,0 @@ -# Pipelines overview - -EDS-NLP provides easy-to-use pipeline components. - -=== "Core" - - | Pipeline | Description | - | ------------------------ | ----------------------------------------------- | - | `eds.normalizer` | Non-destructive input text normalisation | - | `eds.sentences` | Better sentence boundary detection | - | `eds.matcher` | A simple yet powerful entity extractor | - | `eds.terminology` | A simple yet powerful terminology matcher | - | `eds.contextual-matcher` | A conditional entity extractor | - | `eds.endlines` | An unsupervised model to classify each end line | - -=== "Qualifiers" - - See the [Qualifier overview](/pipelines/qualifiers/overview/) for more information. - - --8<-- "docs/pipelines/qualifiers/overview.md:components" - -=== "Miscellaneous" - - --8<-- "docs/pipelines/misc/overview.md:components" - -=== "NER" - - See the [NER overview](/pipelines/ner/overview/) for more information. - - --8<-- "docs/pipelines/ner/overview.md:components" - -=== "Trainable" - - | Pipeline | Description | - | -------------------- | -------------------------------------------------------------------- | - | `eds.nested-ner` | A trainable component for nested (and classic) NER | - | `eds.span-qualifier` | A trainable component for multi-class multi-label span qualification | - -You can add them to your pipeline by simply calling `add_pipe`, for instance: - -```python -import spacy - -nlp = spacy.blank("eds") -nlp.add_pipe("eds.normalizer") -nlp.add_pipe("eds.sentences") -nlp.add_pipe("eds.tnm") -``` diff --git a/docs/scripts/autorefs/plugin.py b/docs/scripts/autorefs/plugin.py index c4ac6a3a0..230f95b2c 100644 --- a/docs/scripts/autorefs/plugin.py +++ b/docs/scripts/autorefs/plugin.py @@ -16,17 +16,16 @@ this plugin searches for references of the form `[identifier][]` or `[title][identifier]` that were not resolved, and fixes them using the previously stored identifier-URL mapping. """ - import contextlib import functools import logging +import os import re from html import escape, unescape from typing import Any, Callable, Dict, List, Match, Optional, Sequence, Tuple, Union from urllib.parse import urlsplit from xml.etree.ElementTree import Element -import pathspec from markdown import Markdown from markdown.extensions import Extension from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor @@ -278,35 +277,37 @@ def __init__(self) -> None: def priority_patterns(self): if self._priority_patterns is None: self._priority_patterns = [ - pathspec.patterns.GitWildMatchPattern(pat) - for pat in self.config.get("priority") + os.path.join("/", pat) for pat in self.config.get("priority") ] return self._priority_patterns - def register_anchor(self, page: str, identifier: str): + def register_anchor(self, url: str, identifier: str): """Register that an anchor corresponding to an identifier was encountered when rendering the page. Arguments: - page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'` + url: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'` identifier: The HTML anchor (without '#') as a string. """ - if identifier in self._url_map: + + new_url = os.path.join("/", f"{url}#{identifier}") + old_url = os.path.join("/", self._url_map.get(identifier, "")).split("#")[0] + + if identifier in self._url_map and not old_url == new_url: rev_patterns = list(enumerate(self.priority_patterns))[::-1] old_priority_idx = next( - ( - i - for i, pat in rev_patterns - if pat.match_file(self._url_map[identifier]) - ), + (i for i, pat in rev_patterns if re.match(pat, old_url)), len(rev_patterns), ) new_priority_idx = next( - (i for i, pat in rev_patterns if pat.match_file(page)), + (i for i, pat in rev_patterns if re.match(pat, new_url)), len(rev_patterns), ) if new_priority_idx >= old_priority_idx: return - self._url_map[identifier] = f"{page}#{identifier}" + if "reference" not in new_url: + raise Exception("URL WTF", new_url) + + self._url_map[identifier] = new_url def register_url(self, identifier: str, url: str): """Register that the identifier should be turned into a link to this URL. @@ -352,12 +353,7 @@ def get_item_url( # noqa: WPS234 Returns: A site-relative URL. """ - url = self._get_item_url(identifier, fallback) - if from_url is not None: - parsed = urlsplit(url) - if not parsed.scheme and not parsed.netloc: - return relative_url(from_url, url) - return url + return self._get_item_url(identifier, fallback) def on_config( self, config: Config, **kwargs @@ -418,10 +414,10 @@ def on_page_content( f"{__name__}: Mapping identifiers to URLs for page {page.file.src_path}" ) for item in page.toc.items: - self.map_urls(page.url, item) + self.map_urls(page, item) return html - def map_urls(self, base_url: str, anchor: AnchorLink) -> None: + def map_urls(self, page: Page, anchor: AnchorLink) -> None: """Recurse on every anchor to map its ID to its absolute URL. This method populates `self.url_map` by side-effect. @@ -430,9 +426,10 @@ def map_urls(self, base_url: str, anchor: AnchorLink) -> None: base_url: The base URL to use as a prefix for each anchor's relative URL. anchor: The anchor to process and to recurse on. """ - self.register_anchor(base_url, anchor.id) + abs_url = os.path.join("/", page.file.url) + self.register_anchor(abs_url, anchor.id) for child in anchor.children: - self.map_urls(base_url, child) + self.map_urls(page, child) def on_post_page( self, output: str, page: Page, **kwargs diff --git a/docs/scripts/griffe_ext.py b/docs/scripts/griffe_ext.py index 9f8fd5af4..8cd495a99 100644 --- a/docs/scripts/griffe_ext.py +++ b/docs/scripts/griffe_ext.py @@ -1,12 +1,24 @@ import ast import importlib import inspect +import logging import sys from typing import Union import astunparse -from griffe import Extension, Object, ObjectNode, get_logger +from griffe import Extension, Object, ObjectNode from griffe.docstrings.dataclasses import DocstringSectionParameters +from griffe.expressions import Expr +from griffe.logger import patch_loggers + + +def get_logger(name): + new_logger = logging.getLogger(name) + new_logger.setLevel("ERROR") + return new_logger + + +patch_loggers(get_logger) logger = get_logger(__name__) @@ -98,5 +110,7 @@ def on_instance(self, node: Union[ast.AST, ObjectNode], obj: Object) -> None: for param in param_section.value: if param.name in defaults: param.default = str(defaults[param.name]) + if isinstance(param.default, Expr): + continue if param.default is not None and len(param.default) > 50: param.default = param.default[: 50 - 3] + "..." diff --git a/docs/scripts/plugin.py b/docs/scripts/plugin.py index af26ec1b0..3f53df3ff 100644 --- a/docs/scripts/plugin.py +++ b/docs/scripts/plugin.py @@ -8,6 +8,9 @@ import mkdocs.structure.files import mkdocs.structure.nav import mkdocs.structure.pages +from mkdocs.config.defaults import MkDocsConfig + +from docs.scripts.autorefs.plugin import AutorefsPlugin def exclude_file(name): @@ -25,6 +28,23 @@ def exclude_file(name): """ +@mkdocs.plugins.event_priority(1000) +def on_config(config: MkDocsConfig): + for event_name, events in config.plugins.events.items(): + for event in list(events): + if "autorefs" in str(event): + print("REMOVING EVENT", event_name, event) + events.remove(event) + old_plugin = config["plugins"]["autorefs"] + plugin_config = dict(old_plugin.config) + print("OLD PLUGIN CLASS:", type(old_plugin)) + plugin = AutorefsPlugin() + print("NEW PLUGIN CLASS:", type(plugin)) + config.plugins["autorefs"] = plugin + config["plugins"]["autorefs"] = plugin + plugin.load_config(plugin_config) + + def on_files(files: mkdocs.structure.files.Files, config: mkdocs.config.Config): """ Recursively the navigation of the mkdocs config @@ -123,19 +143,37 @@ def on_page_read_source(page, config): return None +HREF_REGEX = r'href=(?:"([^"]*)"|\'([^\']*)|[ ]*([^ =>]*)(?![a-z]+=))' + + @mkdocs.plugins.event_priority(-1000) -def on_page_content( - html: str, +def on_post_page( + output: str, page: mkdocs.structure.pages.Page, config: mkdocs.config.Config, - files: mkdocs.structure.files.Files, ): + """ + Replace absolute paths with path relative to the rendered page + This must be performed after all other plugins have run. + + Parameters + ---------- + output + page + config + + Returns + ------- + + """ + def replace_link(match): - relative_url = url = match.group(1) + relative_url = url = match.group(1) or match.group(2) or match.group(3) page_url = os.path.join("/", page.file.url) if url.startswith("/"): relative_url = os.path.relpath(url, page_url) + print("REPLACING", url, "/", page_url, "with", relative_url) return f'href="{relative_url}"' # Replace absolute paths with path relative to the rendered page - return re.sub(r'href="([^"]*)"', replace_link, html) + return re.sub(HREF_REGEX, replace_link, output) diff --git a/mkdocs.yml b/mkdocs.yml index 952b33115..24d842d0a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -72,7 +72,6 @@ nav: - pipelines/qualifiers/history.md - Miscellaneous: - Overview: pipelines/misc/overview.md - - pipelines/misc/index.md - pipelines/misc/dates.md - pipelines/misc/measurements.md - pipelines/misc/consultation-dates.md @@ -173,7 +172,7 @@ plugins: - search - autorefs: priority: - - '*' + - .* - reference - mkdocstrings: enable_inventory: true