JOJ0 · JOJ0 · Oct 25, 2024 · Oct 14, 2024 · Oct 14, 2024 · Oct 15, 2024
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -64,7 +64,13 @@ We are maintaining `synadm` in our spare time and currently are not sponsored by
 
 We keep track of which Synapse Admin API's `synadm` supports in a set of tables on [API to CLI Mapping](https://synadm.readthedocs.io/en/latest/features.html). The structure of this page follows the layout of the official [Synapse Admin API documentation](https://element-hq.github.io/synapse/latest/usage/administration/admin_api/index.html). Each table represents one main chapter of the Synapse documentation.
 
-In our documentation source, the page is defined by [features.rst](https://github.com/JOJ0/synadm/blob/master/doc/source/features.rst), which contains multiple CSV files, each representing a table. To assist with maintaining this page, we offer a [web scraper tool](https://github.com/JOJ0/synadm/blob/master/scrape_docs.py) that pulls data from the Synapse Admin API docs and creates an initial version of such a CSV table. A basic usage example is
+In our documentation source, the page is defined by [features.rst](https://github.com/JOJ0/synadm/blob/master/doc/source/features.rst), which contains multiple CSV files, each representing a table. To assist with maintaining this page, we offer a [web scraper tool](https://github.com/JOJ0/synadm/blob/master/scrape_docs.py) that pulls data from the Synapse Admin API docs and creates an initial version of such a CSV table. To get started with using the tool, first run this command in the repository:
+
+```
+pip install -e '.[scrape_docs]'
+```
+
+A basic usage example is:
 
 ```
 ./scrape_docs.py -o csv https://element-hq.github.io/synapse/latest/admin_api/rooms.html
@@ -77,12 +83,20 @@ which prints a two-column CSV table containing restructuredText formatted hyperl
 ```
 
 This would directly link to the `USER_ID` argument's documentation of that command.
+
 Linking to an option is also possible:
 
 ```
 :option:`synadm media list -r`
 ```
 
+If there's no `synadm` command for the corresponding item, leave the right
+column empty. If the item is a section has no real API and is nested with
+commands (e.g. [delete local media][dellocalmedia]), use the `—` character.
+
+[dellocalmedia]:https://element-hq.github.io/synapse/latest/admin_api/media_admin_api.html#delete-local-media
+
+
 Due to a shortcoming of Sphinx it is currently not possible to link to a plain command (without any option or argument). Also see `scrape_docs.py --help` and the [existing CSV files](https://github.com/JOJ0/synadm/tree/master/doc/source/features).
 
 

diff --git a/scrape_docs.py b/scrape_docs.py
@@ -28,16 +28,26 @@ def scrape(output, url):
     The default output format is "csv", which gives a two column CSV table
     containing restructuredText formatted hyperlinks and a headline.
     '''
+    def get_indentation_levels(heading_tags, heading_tag):
+        """Returns how many indentation levels are required depending on the
+        passed heading tag
+
+        h1 is no indentation,
+        h2 is one indentation level,
+        h3 is two, and so on...
+        """
+        for h in heading_tags:
+            if heading_tag == h:
+                return int(heading_tag[-1]) - 1
+        return 0
+
     chapter = url
     apidoc = requests.get(chapter).text
     soup = BeautifulSoup(apidoc, 'html.parser')
 
     any_heading_tag = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
     elements = soup.find_all([*any_heading_tag, 'a'],)
 
-    if output == 'csv':
-        print('"Synapse Admin API","synadm command(s)"')
-
     for e in elements:
         if e.name in any_heading_tag and output == 'debug':
             print(f'{e.name}: {e.text}')
@@ -46,18 +56,18 @@ def scrape(output, url):
                 link = e['href']
                 if output == 'debug':
                     print(f'Element text:\t{e.text}\nLink/Anchor:\t{link}')
+                    indent_count = get_indentation_levels(any_heading_tag,
+                                                          e.parent.name)
+                    print(f'Indentations:\t{indent_count}')
                 if output in ['rst', 'csv']:
                     parts = chapter.split('/admin_api/')
                     fulllink = f'{parts[0]}/admin_api/{parts[1]}{link}'
+                    indent_count = get_indentation_levels(any_heading_tag,
+                                                          e.parent.name)
                     spacing = ''
-                    for h in any_heading_tag:
-                        if e.parent.name == h:
-                            # h1 is no spacing (decrease by 1),
-                            # h2 is 2 spaces, h3 is 4....
-                            # two literal spaces are replaced by '|indent| '
-                            spacing_count = int(e.parent.name[-1]) - 1
-                            for val in range(0, spacing_count * 2):
-                                spacing += '|indent| '
+                    for val in range(0, indent_count):
+                        # '|indent| ' represents one indentation level
+                        spacing += '|indent| '
                     rst = f'{spacing}`{e.text} <{fulllink}>`_'
                     if output == 'csv':
                         left_col = f'"{rst}"'