diff --git a/sphinx_simplepdf/builders/simplepdf.py b/sphinx_simplepdf/builders/simplepdf.py index c4576dd..2ce3350 100644 --- a/sphinx_simplepdf/builders/simplepdf.py +++ b/sphinx_simplepdf/builders/simplepdf.py @@ -1,3 +1,4 @@ +from collections import Counter import os import re from typing import Any, Dict @@ -172,6 +173,37 @@ def _toctree_fix(self, html): for link in links: link["href"] = link["href"].replace(f"{self.app.config.root_doc}.html", "") + # search for duplicates + counts = dict(Counter([str(x).split(">")[0] for x in links])) + duplicates = {key: value for key, value in counts.items() if value > 1} + + if duplicates: + print("found duplicate references in toctree attempting to fix") + + for text, counter in duplicates.items(): + + ref = re.findall("href=\"#.*\"", str(text)) + + # clean href data for searching + cleaned_ref_toc = ref[0].replace("href=\"", "").replace("\"", "") # "#target" + cleaned_ref_target = ref[0].replace("href=\"#", "").replace("\"", "") # "target" + + occurences = soup.find_all('section', attrs={"id": cleaned_ref_target}) + + # rename duplicate references, relies on fact -> order in toc is order of occurence in document + replace_counter = 0 + + for link in links: + if link["href"] == cleaned_ref_toc: + # edit reference in table of content + link["href"] = link["href"] + "-" + str(replace_counter + 1) + + # edit target reference + occurences[replace_counter]["id"] = occurences[replace_counter]["id"] + "-" + str( + replace_counter + 1) + + replace_counter += 1 + for heading_tag in ["h1", "h2"]: headings = soup.find_all(heading_tag, class_="") for number, heading in enumerate(headings):