diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a2b0da1..4ccb830 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -64,7 +64,13 @@ We are maintaining `synadm` in our spare time and currently are not sponsored by We keep track of which Synapse Admin API's `synadm` supports in a set of tables on [API to CLI Mapping](https://synadm.readthedocs.io/en/latest/features.html). The structure of this page follows the layout of the official [Synapse Admin API documentation](https://element-hq.github.io/synapse/latest/usage/administration/admin_api/index.html). Each table represents one main chapter of the Synapse documentation. -In our documentation source, the page is defined by [features.rst](https://github.com/JOJ0/synadm/blob/master/doc/source/features.rst), which contains multiple CSV files, each representing a table. To assist with maintaining this page, we offer a [web scraper tool](https://github.com/JOJ0/synadm/blob/master/scrape_docs.py) that pulls data from the Synapse Admin API docs and creates an initial version of such a CSV table. A basic usage example is +In our documentation source, the page is defined by [features.rst](https://github.com/JOJ0/synadm/blob/master/doc/source/features.rst), which contains multiple CSV files, each representing a table. To assist with maintaining this page, we offer a [web scraper tool](https://github.com/JOJ0/synadm/blob/master/scrape_docs.py) that pulls data from the Synapse Admin API docs and creates an initial version of such a CSV table. To get started with using the tool, first run this command in the repository: + +``` +pip install -e '.[scrape_docs]' +``` + +A basic usage example is: ``` ./scrape_docs.py -o csv https://element-hq.github.io/synapse/latest/admin_api/rooms.html @@ -77,12 +83,20 @@ which prints a two-column CSV table containing restructuredText formatted hyperl ``` This would directly link to the `USER_ID` argument's documentation of that command. + Linking to an option is also possible: ``` :option:`synadm media list -r` ``` +If there's no `synadm` command for the corresponding item, leave the right +column empty. If the item is a section has no real API and is nested with +commands (e.g. [delete local media][dellocalmedia]), use the `—` character. + +[dellocalmedia]:https://element-hq.github.io/synapse/latest/admin_api/media_admin_api.html#delete-local-media + + Due to a shortcoming of Sphinx it is currently not possible to link to a plain command (without any option or argument). Also see `scrape_docs.py --help` and the [existing CSV files](https://github.com/JOJ0/synadm/tree/master/doc/source/features). diff --git a/scrape_docs.py b/scrape_docs.py index 0320082..79255cf 100755 --- a/scrape_docs.py +++ b/scrape_docs.py @@ -28,6 +28,19 @@ def scrape(output, url): The default output format is "csv", which gives a two column CSV table containing restructuredText formatted hyperlinks and a headline. ''' + def get_indentation_levels(heading_tags, heading_tag): + """Returns how many indentation levels are required depending on the + passed heading tag + + h1 is no indentation, + h2 is one indentation level, + h3 is two, and so on... + """ + for h in heading_tags: + if heading_tag == h: + return int(heading_tag[-1]) - 1 + return 0 + chapter = url apidoc = requests.get(chapter).text soup = BeautifulSoup(apidoc, 'html.parser') @@ -35,9 +48,6 @@ def scrape(output, url): any_heading_tag = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] elements = soup.find_all([*any_heading_tag, 'a'],) - if output == 'csv': - print('"Synapse Admin API","synadm command(s)"') - for e in elements: if e.name in any_heading_tag and output == 'debug': print(f'{e.name}: {e.text}') @@ -46,18 +56,18 @@ def scrape(output, url): link = e['href'] if output == 'debug': print(f'Element text:\t{e.text}\nLink/Anchor:\t{link}') + indent_count = get_indentation_levels(any_heading_tag, + e.parent.name) + print(f'Indentations:\t{indent_count}') if output in ['rst', 'csv']: parts = chapter.split('/admin_api/') fulllink = f'{parts[0]}/admin_api/{parts[1]}{link}' + indent_count = get_indentation_levels(any_heading_tag, + e.parent.name) spacing = '' - for h in any_heading_tag: - if e.parent.name == h: - # h1 is no spacing (decrease by 1), - # h2 is 2 spaces, h3 is 4.... - # two literal spaces are replaced by '|indent| ' - spacing_count = int(e.parent.name[-1]) - 1 - for val in range(0, spacing_count * 2): - spacing += '|indent| ' + for val in range(0, indent_count): + # '|indent| ' represents one indentation level + spacing += '|indent| ' rst = f'{spacing}`{e.text} <{fulllink}>`_' if output == 'csv': left_col = f'"{rst}"'