docs: use template to generate rst files in "Examples"

Graviti-AI · Nov 9, 2021 · 1775a0d · 1775a0d
1 parent c57ea4b
commit 1775a0d
Show file tree

Hide file tree

Showing 13 changed files with 641 additions and 201 deletions.
diff --git a/.gitignore b/.gitignore
@@ -71,6 +71,14 @@ instance/
 # Sphinx documentation
 docs/_build/
 
+# Sphinx examples rst files which is generated by the template
+docs/source/quick_start/examples/BSTLD.rst
+docs/source/quick_start/examples/DogsVsCats.rst
+docs/source/quick_start/examples/LeedsSportsPose.rst
+docs/source/quick_start/examples/Newsgroups20.rst
+docs/source/quick_start/examples/NeolixOD.rst
+docs/source/quick_start/examples/THCHS30.rst
+
 # PyBuilder
 target/
 

diff --git a/docs/code/LeedsSportsPose.py b/docs/code/LeedsSportsPose.py
@@ -43,6 +43,10 @@
 dataset = Dataset("LeedsSportsPose", gas)
 """"""
 
+"""Read Dataset / list segment names"""
+dataset.keys()
+""""""
+
 """Read Dataset / get segment"""
 segment = dataset[0]
 """"""

diff --git a/docs/code/NeolixOD.py b/docs/code/NeolixOD.py
@@ -42,6 +42,10 @@
 dataset = Dataset("NeolixOD", gas)
 """"""
 
+"""Read Dataset / list segment names"""
+dataset.keys()
+""""""
+
 """Read Dataset / get segment"""
 segment = dataset[0]
 """"""

diff --git a/docs/source/__init__.py b/docs/source/__init__.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+#
+# Copyright 2021 Graviti. Licensed under MIT License.
+#
+
+"""source."""
diff --git a/docs/source/_templates/__init__.py b/docs/source/_templates/__init__.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+#
+# Copyright 2021 Graviti. Licensed under MIT License.
+#
+
+"""template."""
diff --git a/docs/source/_templates/examples.py b/docs/source/_templates/examples.py
@@ -0,0 +1,176 @@
+"""The template for example rst files."""
+
+EXAMPLES_TEMPLATE = '''
+###################
+ {dataset_name}
+###################
+
+This topic describes how to manage the `{dataset_name} Dataset <https://gas.graviti.cn/dataset/
+data-decorators/{file_name}>`_, which is a dataset with
+:ref:`reference/label_format/{label_type}:{label_type}` label
+{figure_description}
+
+*****************************
+ Authorize a Client Instance
+*****************************
+
+An :ref:`reference/glossary:accesskey` is needed to authenticate identity when using TensorBay.
+
+.. literalinclude:: ../../../../docs/code/{file_name}.py
+   :language: python
+   :start-after: """Authorize a Client Instance"""
+   :end-before: """"""
+
+****************
+ Create Dataset
+****************
+
+.. literalinclude:: ../../../../docs/code/{file_name}.py
+   :language: python
+   :start-after: """Create Dataset"""
+   :end-before: """"""
+
+******************
+ Organize Dataset
+******************
+
+Normally, ``dataloader.py`` and ``catalog.json`` are required to organize the "{dataset_name}"
+dataset into the :class:`~tensorbay.dataset.dataset.Dataset` instance.
+In this example, they are stored in the same directory like::
+
+    {dataset_name}/
+        catalog.json
+        dataloader.py
+
+Step 1: Write the Catalog
+=========================
+
+A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which
+is typically stored in a json file like ``catalog.json``.
+{catalog_description}
+
+{category_attribute_description}
+
+.. note::
+
+   By passing the path of the ``catalog.json``, :func:`~tensorbay.dataset.dataset.DatasetBase.
+   load_catalog` supports loading the catalog into dataset.
+
+.. important::
+
+   See :ref:`catalog table <reference/dataset_structure:catalog>` for more catalogs with different
+   label types.
+
+Step 2: Write the Dataloader
+============================
+
+A :ref:`reference/glossary:dataloader` is needed to organize the dataset into a :class:`~tensorbay.
+dataset.dataset.Dataset` instance.
+
+.. literalinclude:: ../../../../tensorbay/opendataset/{file_name}/loader.py
+   :language: python
+   :name: {file_name}-dataloader
+   :linenos:
+
+See :ref:`{label_type} annotation <reference/label_format/{label_type}:{label_type}>` for more
+details.
+
+There are already a number of dataloaders in TensorBay SDK provided by the community.
+Thus, instead of writing, importing an available dataloader is also feasible.
+
+.. literalinclude:: ../../../../docs/code/{file_name}.py
+   :language: python
+   :start-after: """Organize dataset / import dataloader"""
+   :end-before: """"""
+
+.. note::
+
+   Note that catalogs are automatically loaded in available dataloaders, users do not have to write
+   them again.
+
+.. important::
+
+   See :ref:`dataloader table <reference/glossary:dataloader>` for dataloaders with different label
+   types.
+
+*******************
+ Visualize Dataset
+*******************
+
+Optionally, the organized dataset can be visualized by **Pharos**, which is a TensorBay SDK plug-in.
+This step can help users to check whether the dataset is correctly organized.
+Please see :ref:`features/visualization:Visualization` for more details.
+
+****************
+ Upload Dataset
+****************
+
+The organized "{dataset_name}" dataset can be uploaded to TensorBay for sharing, reuse, etc.
+
+.. literalinclude:: ../../../../docs/code/{file_name}.py
+   :language: python
+   :start-after: """Upload Dataset"""
+   :end-before: """"""
+
+.. note::
+   Set ``skip_uploaded_files=True`` to skip uploaded data.
+   The data will be skiped if its name and segment name is the same as remote data.
+
+Similar with Git, the commit step after uploading can record changes to the dataset as a version.
+If needed, do the modifications and commit again.
+Please see :ref:`features/version_control/index:Version Control` for more details.
+
+**************
+ Read Dataset
+**************
+
+Now "{dataset_name}" dataset can be read from TensorBay.
+
+.. literalinclude:: ../../../../docs/code/{file_name}.py
+   :language: python
+   :start-after: """Read Dataset / get dataset"""
+   :end-before: """"""
+
+Get the segment names by listing them all.
+
+.. literalinclude:: ../../../../docs/code/{file_name}.py
+   :language: python
+   :start-after: """Read Dataset / list segment names"""
+   :end-before: """"""
+
+Get a segment by passing the required segment name.
+
+.. literalinclude:: ../../../../docs/code/{file_name}.py
+   :language: python
+   :start-after: """Read Dataset / get segment"""
+   :end-before: """"""
+
+In the :ref:`reference/dataset_structure:segment`, there is a sequence of
+:ref:`reference/dataset_structure:data`, which can be obtained by index.
+
+.. literalinclude:: ../../../../docs/code/{file_name}.py
+   :language: python
+   :start-after: """Read Dataset / get data"""
+   :end-before: """"""
+
+In each :ref:`reference/dataset_structure:data`,
+there is a sequence of :ref:`reference/label_format/{label_type}:{label_type}` annotations,
+which can be obtained by index.
+
+.. literalinclude:: ../../../../docs/code/{file_name}.py
+   :language: python
+   :start-after: """Read Dataset / get label"""
+   :end-before: """"""
+
+There is only one label type in "{dataset_name}" dataset, which is ``{label_type}``.
+{information_description}
+
+****************
+ Delete Dataset
+****************
+
+.. literalinclude:: ../../../../docs/code/{file_name}.py
+   :language: python
+   :start-after: """Delete Dataset"""
+   :end-before: """"""
+'''
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -15,11 +15,14 @@
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 """Configuration file for the Sphinx documentation builder."""
+import os
 import sys
 from pathlib import Path
 
 sys.path.insert(0, str(Path(__file__).parents[2]))
-
+from docs.source._templates.examples import (  # noqa: E402 # pylint: disable=wrong-import-position
+    EXAMPLES_TEMPLATE,
+)
 
 # -- Project information -----------------------------------------------------
 
@@ -79,3 +82,132 @@
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 # html_static_path = ["_static"]
+
+source_path = os.path.dirname(os.path.abspath(__file__))
+example_path = os.path.join(source_path, "quick_start", "examples")
+dataset_names = (
+    "Dogs Vs Cats",
+    "20 Newsgroups",
+    "BSTLD",
+    "Neolix OD",
+    "Leeds Sports Pose",
+    "THCHS-30",
+)
+label_types = (
+    "Classification",
+    "Classification",
+    "Box2D",
+    "Box3D",
+    "Keypoints2D",
+    "Sentence",
+)
+file_names = ("DogsVsCats", "Newsgroups20", "BSTLD", "NeolixOD", "LeedsSportsPose", "THCHS30")
+
+dataset_with_images = ("BSTLD", "Neolix OD", "Leeds Sports Pose")
+
+figure_description = """(:numref:`Fig. %s <example-{file_name}>`).
+
+.. _example-{file_name}:
+
+.. figure:: ../../images/example-{label_type}.png
+   :scale: 50 %
+   :align: center
+
+   The preview of a cropped image with labels from "{dataset_name}".
+"""
+
+category_attribute_descriptions = {}
+category_attribute_descriptions[
+    "BSTLD"
+] = """
+The only annotation type for "{dataset_name}" is
+:ref:`reference/label_format/{label_type}:{label_type}`, and there are 13
+:ref:`reference/label_format/CommonLabelProperties:category` types and one
+:ref:`reference/label_format/CommonLabelProperties:attributes` type.
+"""
+
+category_attribute_descriptions[
+    "Dogs Vs Cats"
+] = """
+The only annotation type for "{dataset_name}" is
+:ref:`reference/label_format/{label_type}:{label_type}`, and there are 2
+:ref:`reference/label_format/CommonLabelProperties:category` types.
+"""
+
+category_attribute_descriptions[
+    "Leeds Sports Pose"
+] = """
+The only annotation type for "{dataset_name}" is
+:ref:`reference/label_format/{label_type}:{label_type}`.
+"""
+
+category_attribute_descriptions[
+    "Neolix OD"
+] = """
+The only annotation type for "{dataset_name}" is
+:ref:`reference/label_format/{label_type}:{label_type}`, and there are 15
+:ref:`reference/label_format/CommonLabelProperties:category` types and 3
+:ref:`reference/label_format/CommonLabelProperties:attributes` type.
+"""
+
+category_attribute_descriptions[
+    "20 Newsgroups"
+] = """
+The only annotation type for "{dataset_name}" is
+:ref:`reference/label_format/{label_type}:{label_type}`, and there are 20
+:ref:`reference/label_format/CommonLabelProperties:category` types
+"""
+
+category_attribute_descriptions["THCHS-30"] = ""
+
+# from docs.source._templates.examples import EXAMPLES_TEMPLATE
+for dataset_name, label_type, file_name in zip(dataset_names, label_types, file_names):
+    if dataset_name == "THCHS-30":
+        catalog_description = """However the catalog of THCHS-30 is too large, instead of
+reading it from json file, we read it by mapping from subcatalog that is loaded by
+the raw file. Check the :ref:`dataloader <THCHS30-dataloader>` below for more details.
+"""
+        information_description = """It contains ``sentence``, ``spell`` and ``phone`` information.
+See :ref:`Sentence <reference/label_format/{label_type}:{label_type}>` label format for
+more details.
+"""
+    else:
+        catalog_description = """
+.. literalinclude:: ../../../../tensorbay/opendataset/{file_name}/catalog.json
+   :language: json
+   :name: {file_name}-catalog
+   :linenos:
+"""
+        information_description = """The information stored in
+:ref:`reference/label_format/CommonLabelProperties:category` is one of the names in "categories"
+list of :ref:`catalog.json <{file_name}-catalog>`. The information stored in
+:ref:`reference/label_format/CommonLabelProperties:attributes` is one or several of
+the attributes in "attributes" list of :ref:`catalog.json <{file_name}-catalog>`.
+See :ref:`reference/label_format/{label_type}:{label_type}` label format for more details.
+"""
+
+    if dataset_name in dataset_with_images:
+        figure_description_tmp = figure_description.format(
+            dataset_name=dataset_name, file_name=file_name, label_type=label_type
+        )
+    else:
+        figure_description_tmp = ""
+    catalog_description_tmp = catalog_description.format(file_name=file_name)
+    information_description_tmp = information_description.format(
+        label_type=label_type, file_name=file_name
+    )
+    category_attribute_description = category_attribute_descriptions[dataset_name].format(
+        dataset_name=dataset_name, label_type=label_type
+    )
+    with open(os.path.join(example_path, f"{file_name}.rst"), "w", encoding="utf-8") as fp:
+        fp.write(
+            EXAMPLES_TEMPLATE.format(
+                dataset_name=dataset_name,
+                file_name=file_name,
+                label_type=label_type,
+                figure_description=figure_description_tmp,
+                catalog_description=catalog_description_tmp,
+                category_attribute_description=category_attribute_description,
+                information_description=information_description_tmp,
+            )
+        )