From 6a8ea32b8574bd762f5f6b3a8384ed7d67d044aa Mon Sep 17 00:00:00 2001
From: Jackson <jackson@jacksonchen666.com>
Date: Mon, 14 Oct 2024 11:30:23 +0200
Subject: [PATCH 1/3] make scrape_docs.py csv consistent with format

- ignoring h1
- no indents from h2
---
 scrape_docs.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/scrape_docs.py b/scrape_docs.py
index 0320082..49b2432 100755
--- a/scrape_docs.py
+++ b/scrape_docs.py
@@ -32,12 +32,9 @@ def scrape(output, url):
     apidoc = requests.get(chapter).text
     soup = BeautifulSoup(apidoc, 'html.parser')
 
-    any_heading_tag = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
+    any_heading_tag = ['h2', 'h3', 'h4', 'h5', 'h6']
     elements = soup.find_all([*any_heading_tag, 'a'],)
 
-    if output == 'csv':
-        print('"Synapse Admin API","synadm command(s)"')
-
     for e in elements:
         if e.name in any_heading_tag and output == 'debug':
             print(f'{e.name}: {e.text}')
@@ -52,10 +49,10 @@ def scrape(output, url):
                     spacing = ''
                     for h in any_heading_tag:
                         if e.parent.name == h:
-                            # h1 is no spacing (decrease by 1),
-                            # h2 is 2 spaces, h3 is 4....
+                            # h2 is no spacing (decrease by 2),
+                            # h3 is 2 spaces, h4 is 4....
                             # two literal spaces are replaced by '|indent| '
-                            spacing_count = int(e.parent.name[-1]) - 1
+                            spacing_count = int(e.parent.name[-1]) - 2
                             for val in range(0, spacing_count * 2):
                                 spacing += '|indent| '
                     rst = f'{spacing}`{e.text} <{fulllink}>`_'

From 612298cf400da8be9cee6257e50bbb26c13286aa Mon Sep 17 00:00:00 2001
From: Jackson <jackson@jacksonchen666.com>
Date: Mon, 14 Oct 2024 11:42:45 +0200
Subject: [PATCH 2/3] CONTRIBUTING.md: add pip install, and handling commands

---
 CONTRIBUTING.md | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a2b0da1..4ccb830 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -64,7 +64,13 @@ We are maintaining `synadm` in our spare time and currently are not sponsored by
 
 We keep track of which Synapse Admin API's `synadm` supports in a set of tables on [API to CLI Mapping](https://synadm.readthedocs.io/en/latest/features.html). The structure of this page follows the layout of the official [Synapse Admin API documentation](https://element-hq.github.io/synapse/latest/usage/administration/admin_api/index.html). Each table represents one main chapter of the Synapse documentation.
 
-In our documentation source, the page is defined by [features.rst](https://github.com/JOJ0/synadm/blob/master/doc/source/features.rst), which contains multiple CSV files, each representing a table. To assist with maintaining this page, we offer a [web scraper tool](https://github.com/JOJ0/synadm/blob/master/scrape_docs.py) that pulls data from the Synapse Admin API docs and creates an initial version of such a CSV table. A basic usage example is
+In our documentation source, the page is defined by [features.rst](https://github.com/JOJ0/synadm/blob/master/doc/source/features.rst), which contains multiple CSV files, each representing a table. To assist with maintaining this page, we offer a [web scraper tool](https://github.com/JOJ0/synadm/blob/master/scrape_docs.py) that pulls data from the Synapse Admin API docs and creates an initial version of such a CSV table. To get started with using the tool, first run this command in the repository:
+
+```
+pip install -e '.[scrape_docs]'
+```
+
+A basic usage example is:
 
 ```
 ./scrape_docs.py -o csv https://element-hq.github.io/synapse/latest/admin_api/rooms.html
@@ -77,12 +83,20 @@ which prints a two-column CSV table containing restructuredText formatted hyperl
 ```
 
 This would directly link to the `USER_ID` argument's documentation of that command.
+
 Linking to an option is also possible:
 
 ```
 :option:`synadm media list -r`
 ```
 
+If there's no `synadm` command for the corresponding item, leave the right
+column empty. If the item is a section has no real API and is nested with
+commands (e.g. [delete local media][dellocalmedia]), use the `—` character.
+
+[dellocalmedia]:https://element-hq.github.io/synapse/latest/admin_api/media_admin_api.html#delete-local-media
+
+
 Due to a shortcoming of Sphinx it is currently not possible to link to a plain command (without any option or argument). Also see `scrape_docs.py --help` and the [existing CSV files](https://github.com/JOJ0/synadm/tree/master/doc/source/features).
 
 

From bc60b173c85adf85fe1f0ab65f1c2609bcb90a59 Mon Sep 17 00:00:00 2001
From: J0J0 Todos <jojo@peek-a-boo.at>
Date: Tue, 15 Oct 2024 08:18:43 +0200
Subject: [PATCH 3/3] scrape_docs: Fix h1 exclusion and refactor

- h1 tag shouldn't be excluded in output
- Refactor for readability: Add a separate function for finding "how
  many indentations" are required.
---
 scrape_docs.py | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/scrape_docs.py b/scrape_docs.py
index 49b2432..79255cf 100755
--- a/scrape_docs.py
+++ b/scrape_docs.py
@@ -28,11 +28,24 @@ def scrape(output, url):
     The default output format is "csv", which gives a two column CSV table
     containing restructuredText formatted hyperlinks and a headline.
     '''
+    def get_indentation_levels(heading_tags, heading_tag):
+        """Returns how many indentation levels are required depending on the
+        passed heading tag
+
+        h1 is no indentation,
+        h2 is one indentation level,
+        h3 is two, and so on...
+        """
+        for h in heading_tags:
+            if heading_tag == h:
+                return int(heading_tag[-1]) - 1
+        return 0
+
     chapter = url
     apidoc = requests.get(chapter).text
     soup = BeautifulSoup(apidoc, 'html.parser')
 
-    any_heading_tag = ['h2', 'h3', 'h4', 'h5', 'h6']
+    any_heading_tag = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
     elements = soup.find_all([*any_heading_tag, 'a'],)
 
     for e in elements:
@@ -43,18 +56,18 @@ def scrape(output, url):
                 link = e['href']
                 if output == 'debug':
                     print(f'Element text:\t{e.text}\nLink/Anchor:\t{link}')
+                    indent_count = get_indentation_levels(any_heading_tag,
+                                                          e.parent.name)
+                    print(f'Indentations:\t{indent_count}')
                 if output in ['rst', 'csv']:
                     parts = chapter.split('/admin_api/')
                     fulllink = f'{parts[0]}/admin_api/{parts[1]}{link}'
+                    indent_count = get_indentation_levels(any_heading_tag,
+                                                          e.parent.name)
                     spacing = ''
-                    for h in any_heading_tag:
-                        if e.parent.name == h:
-                            # h2 is no spacing (decrease by 2),
-                            # h3 is 2 spaces, h4 is 4....
-                            # two literal spaces are replaced by '|indent| '
-                            spacing_count = int(e.parent.name[-1]) - 2
-                            for val in range(0, spacing_count * 2):
-                                spacing += '|indent| '
+                    for val in range(0, indent_count):
+                        # '|indent| ' represents one indentation level
+                        spacing += '|indent| '
                     rst = f'{spacing}`{e.text} <{fulllink}>`_'
                     if output == 'csv':
                         left_col = f'"{rst}"'