From e76a1a175bd2651613b61d9c90dd946c9a8756c4 Mon Sep 17 00:00:00 2001 From: "haote.yang" Date: Tue, 11 May 2021 15:28:42 +0800 Subject: [PATCH] docs: add plug-in for dataset management examples auto generation --- docs/plugins/__init__.py | 6 + docs/plugins/example_creator.py | 221 ++++++++++++++++++ docs/source/conf.py | 6 +- docs/source/examples/BSTLD.rst | 173 -------------- docs/source/examples/DogsVsCats.rst | 162 ------------- docs/source/examples/LeedsSportsPose.rst | 162 ------------- docs/source/examples/NeolixOD.rst | 170 -------------- docs/source/examples/Newsgroups20.rst | 179 -------------- docs/source/examples/THCHS30.rst | 160 ------------- docs/source/examples/template.json | 34 +++ .../images/{example-Box2D.png => BSTLD.png} | Bin ...le-Keypoints2D.png => LeedsSportsPose.png} | Bin .../{example-Box3D.png => NeolixOD.png} | Bin 13 files changed, 266 insertions(+), 1007 deletions(-) create mode 100644 docs/plugins/__init__.py create mode 100644 docs/plugins/example_creator.py delete mode 100644 docs/source/examples/BSTLD.rst delete mode 100644 docs/source/examples/DogsVsCats.rst delete mode 100644 docs/source/examples/LeedsSportsPose.rst delete mode 100644 docs/source/examples/NeolixOD.rst delete mode 100644 docs/source/examples/Newsgroups20.rst delete mode 100644 docs/source/examples/THCHS30.rst create mode 100644 docs/source/examples/template.json rename docs/source/images/{example-Box2D.png => BSTLD.png} (100%) rename docs/source/images/{example-Keypoints2D.png => LeedsSportsPose.png} (100%) rename docs/source/images/{example-Box3D.png => NeolixOD.png} (100%) diff --git a/docs/plugins/__init__.py b/docs/plugins/__init__.py new file mode 100644 index 000000000..e7bbc7530 --- /dev/null +++ b/docs/plugins/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 +# +# Copyright 2021 Graviti. Licensed under MIT License. +# + +"""plugins.""" diff --git a/docs/plugins/example_creator.py b/docs/plugins/example_creator.py new file mode 100644 index 000000000..14876ddf4 --- /dev/null +++ b/docs/plugins/example_creator.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +# +# Copyright 2021 Graviti. Licensed under MIT License. +# + +# pylint: disable=wrong-import-position +# pylint: disable=no-value-for-parameter +# pylint: disable=too-many-instance-attributes + +"""This file generates examples/*.rst with dataset names listed in examples.rst.""" + +import json +import os +import re +import sys +from pathlib import Path +from typing import Any, Dict, List + +REPO_PATH = str(Path(os.path.abspath(__file__)).parents[2]) +sys.path.insert(0, REPO_PATH) + +from tensorbay import GAS # noqa: E402 +from tensorbay.dataset import Segment # noqa: E402 + + +class ExampleCreator: + """Create an dataset management example.""" + + _OPEN_DATASETS_PATH = os.path.join(REPO_PATH, "tensorbay", "opendataset") + _SOURCE_PATH = os.path.join(REPO_PATH, "docs", "source") + _EXAMPLES_PATH = os.path.join(_SOURCE_PATH, "examples") + _IMAGES_PATH = os.path.join(_SOURCE_PATH, "images") + _TEMPLATE_PATH = os.path.join(_EXAMPLES_PATH, "template.json") + _EXAMPLES_RST_PATH = os.path.join(_SOURCE_PATH, "quick_start", "examples.rst") + + _TOPMOST_TITLE_LEVEL = 0 + _EXAMPLE_TITLES = { + "authorization": "Authorize a Client Instance", + "create_dataset": "Create Dataset", + "organize_dataset": "Organize Dataset", + "upload_dataset": "Upload Dataset", + "read_dataset": "Read Dataset", + "read_label": "Read Label", + "delete_dataset": "Delete Dataset", + } + + def __init__(self, access_key: str, name: str, alias: str) -> None: + self._access_key = access_key + self._name = name + self._alias = alias + self._dataset_path = os.path.join(self._OPEN_DATASETS_PATH, self._name) + self._is_catalog_existed = os.path.exists(os.path.join(self._dataset_path, "catalog.json")) + self._dataset_client = GAS(self._access_key).get_dataset(self._name) + self._label_types = self._get_label_types() + self._segment_names = self._dataset_client.list_segment_names() + self._template = self._get_template() + self._content: List[str] = [] + + self._load_content() + + def dump_content(self) -> None: + """Dump the content in an example file.""" + with open(os.path.join(self._EXAMPLES_PATH, f"{self._name}.rst"), "w") as fp: + fp.write("".join(self._content)) + + def _get_template(self) -> Dict[str, Any]: + with open(self._TEMPLATE_PATH, "r") as fp: + template_ = fp.read() + template_name_replaced = template_.replace("", self._name) + template_alias_replaced = template_name_replaced.replace("", self._alias) + template: Dict[str, Any] = json.loads(template_alias_replaced) + return template + + def _load_content(self) -> None: + self._load_top() + self._load_authorization() + self._load_create_dataset() + self._load_organize_dataset() + self._load_upload_dataset() + self._load_read_dataset() + self._load_read_label() + self._load_delete_dataset() + + def _load_top(self) -> None: + self._create_and_add_title(self._alias, self._TOPMOST_TITLE_LEVEL) + lines = self._template[self._name] + self._content.append(lines["head"].replace("", self._organize_label_types())) + if os.path.exists(os.path.join(self._IMAGES_PATH, f"{self._name}.png")): + self._content.append(lines["with-image"]) + else: + self._content.append(lines["without-image"]) + + def _create_and_add_title(self, title: str, title_level: int) -> None: + char = "#" if title_level == 0 else "*" + decorator_line = char * (len(title) + 2) + self._content.append(f"{decorator_line}\n {title} \n{decorator_line}\n\n") + + def _organize_label_types(self) -> str: + if len(self._label_types) == 1: + return self._label_types[0] + + return f'{", ".join(self._label_types[:-1])} and {self._label_types[-1]})' + + def _get_label_types(self) -> List[str]: + label_types = self._dataset_client.get_catalog().dumps().keys() + capitalized_types = [type_.capitalize() for type_ in label_types] + for index, type_ in enumerate(capitalized_types): + capitalized_types[index] = type_[:-1] + "D" if re.search(r"\dd", type_) else type_ + return capitalized_types + + def _load_authorization(self) -> None: + self._assemble(self._EXAMPLE_TITLES["authorization"], self._TOPMOST_TITLE_LEVEL + 1) + + def _assemble(self, key: str, title_level: int) -> None: + self._create_and_add_title(key, title_level) + self._content.append(self._template[key]) + + def _load_create_dataset(self) -> None: + self._assemble(self._EXAMPLE_TITLES["create_dataset"], self._TOPMOST_TITLE_LEVEL + 1) + + def _load_organize_dataset(self) -> None: + key = self._EXAMPLE_TITLES["organize_dataset"] + self._create_and_add_title(key, self._TOPMOST_TITLE_LEVEL + 1) + lines = self._template[key] + self._content.append(lines["head"]) + self._content.append( + lines["with catalog"] if self._is_catalog_existed else lines["without catalog"] + ) + self._content.append(self._replace_label_classes(lines["tail"])) + + def _replace_label_classes(self, line: str) -> str: + loader_path = os.path.join(self._dataset_path, "loader.py") + with open(loader_path, "r") as fp: + loader_lines = fp.readlines() + + for loader_line in loader_lines: + if loader_line.startswith("from ...label"): + label_classes = loader_line.split(" import ")[1] + + return line.replace("", label_classes) + + def _load_upload_dataset(self) -> None: + self._assemble(self._EXAMPLE_TITLES["upload_dataset"], self._TOPMOST_TITLE_LEVEL + 1) + + def _load_read_dataset(self) -> None: + key = self._EXAMPLE_TITLES["read_dataset"] + self._create_and_add_title(key, self._TOPMOST_TITLE_LEVEL + 1) + lines = self._template[key] + + self._content.append(self._replace_segments(lines["get_dataset"])) + if len(self._segment_names) > 1: + self._content.append(lines["list_segments"]) + self._content.append(self._replace_read_segment(lines["get_segment"])) + + def _replace_segments(self, line: str) -> str: + segment_number = len(self._segment_names) + if segment_number == 1: + description = line.replace("", "is 1 :ref:`segment") + if self._segment_names[0] == "": + return description.replace("", '``""`` (an empty string)') + + return description.replace("", "``{name}``") + + description = line.replace("", f"are {str(segment_number)} :ref:`segments") + return description.replace("", self._format_segment_names()) + + def _format_segment_names(self) -> str: + description = ", ".join([f"``{name}``" for name in self._segment_names[:-1]]) + return f"{description} and ``{self._segment_names[-1]}``" + + def _replace_read_segment(self, line: str) -> str: + for name in self._segment_names: + segment = Segment(name, self._dataset_client) + data = segment[0] + has_all_types_of_labels = True + for label_type in self._label_types: + if not hasattr(data.label, label_type.lower()): + has_all_types_of_labels = False + break + if has_all_types_of_labels: + return line.replace("", name) + + raise ValueError("No segment has all label types.") + + def _load_read_label(self) -> None: + key = self._EXAMPLE_TITLES["read_label"] + for type_ in self._label_types: + self._content.append(self._template[key]["head"].replace("", type_)) + self._content.append(self._template[key][type_]) + self._content.append(self._template[key]["tail"]) + + def _load_delete_dataset(self) -> None: + self._assemble(self._EXAMPLE_TITLES["delete_dataset"], self._TOPMOST_TITLE_LEVEL + 1) + + @classmethod + def generate_examples(cls) -> None: + """Generate example rst files automatically based on the example table in examples.rst.""" + print("Creating dataset management examples...") + for name, alias in cls.get_dataset_names_and_aliases(): + print(f"\t--{name}") + access_key = os.environ["ACCESS_KEY"] + example_creator = cls(access_key, name, alias) + example_creator.dump_content() + + @classmethod + def get_dataset_names_and_aliases(cls) -> List[List[str]]: + """Get all dataset names and aliases in quick_start/examples.rst. + + Returns: + A list contains all the names and aliases. + """ + with open(cls._EXAMPLES_RST_PATH, "r") as fp: + examples_lines = fp.readlines() + valid_lines = filter( + lambda line: line.strip().endswith("Dataset Management"), examples_lines + ) + return [line.split("`")[1].split("/")[1].split(":") for line in valid_lines] + + +if __name__ == "__main__": + ExampleCreator.generate_examples() diff --git a/docs/source/conf.py b/docs/source/conf.py index 336db306f..628ba5d83 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -2,7 +2,7 @@ # # Copyright 2021 Graviti. Licensed under MIT License. # -# pylint: disable=missing-module-docstring,invalid-name +# pylint: disable=missing-module-docstring,invalid-name,wrong-import-position # # This file only contains a selection of the most common options. For a full # list see the documentation: @@ -20,6 +20,7 @@ sys.path.insert(0, str(Path(__file__).parents[2])) +from docs.plugins.example_creator import ExampleCreator # noqa: E402 # -- Project information ----------------------------------------------------- @@ -78,3 +79,6 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ["_static"] + +# -- Operations for dataset management examples generation ------------------ +ExampleCreator.generate_examples() diff --git a/docs/source/examples/BSTLD.rst b/docs/source/examples/BSTLD.rst deleted file mode 100644 index 1ea9246e9..000000000 --- a/docs/source/examples/BSTLD.rst +++ /dev/null @@ -1,173 +0,0 @@ -######## - BSTLD -######## - -This topic describes how to manage the `BSTLD Dataset `_, -which is a dataset with :ref:`reference/label_format:Box2D` label(:numref:`Fig. %s `). - -.. _example-bstld: - -.. figure:: ../images/example-Box2D.png - :scale: 50 % - :align: center - - The preview of a cropped image with labels from "BSTLD". - -***************************** - Authorize a Client Instance -***************************** - -An :ref:`reference/glossary:accesskey` is needed to authenticate identity when using TensorBay. - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Authorize a Client Instance""" - :end-before: """""" - -**************** - Create Dataset -**************** - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Create Dataset""" - :end-before: """""" - -****************** - Organize Dataset -****************** - -It takes the following steps to organize the "BSTLD" dataset by the :class:`~tensorbay.dataset.dataset.Dataset` instance. - -Step 1: Write the Catalog -========================= - -A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which -is typically stored in a json file. - -.. literalinclude:: ../../../tensorbay/opendataset/BSTLD/catalog.json - :language: json - :name: BSTLD-catalog - :linenos: - -The only annotation type for "BSTLD" is :ref:`reference/label_format:Box2D`, and there are 13 -:ref:`reference/label_format:category` types and one :ref:`reference/label_format:attributes` type. - -.. important:: - - See :ref:`catalog table ` for more catalogs with different label types. - -Step 2: Write the Dataloader -============================ - -A :ref:`reference/glossary:dataloader` is needed to organize the dataset into -a :class:`~tensorbay.dataset.dataset.Dataset` instance. - -.. literalinclude:: ../../../tensorbay/opendataset/BSTLD/loader.py - :language: python - :name: BSTLD-dataloader - :linenos: - -See :ref:`Box2D annotation ` for more details. - -.. note:: - - Since the :ref:`BSTLD dataloader ` above is already included in TensorBay, - so it uses relative import. - However, the regular import should be used when writing a new dataloader. - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Organize Dataset / regular import""" - :end-at: from tensorbay.label import LabeledBox2D - -There are already a number of dataloaders in TensorBay SDK provided by the community. -Thus, instead of writing, importing an available dataloader is also feasible. - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Organize dataset / import dataloader""" - :end-before: """""" - -.. note:: - - Note that catalogs are automatically loaded in available dataloaders, users do not have to write them again. - -.. important:: - - See :ref:`dataloader table ` for dataloaders with different label types. - -**************** - Upload Dataset -**************** - -The organized "BSTLD" dataset can be uploaded to TensorBay for sharing, reuse, etc. - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Upload Dataset""" - :end-before: """""" - -Similar with Git, the commit step after uploading can record changes to the dataset as a version. -If needed, do the modifications and commit again. -Please see :ref:`features/version_control:Version Control` for more details. - -************** - Read Dataset -************** - -Now "BSTLD" dataset can be read from TensorBay. - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Read Dataset / get dataset""" - :end-before: """""" - -In :ref:`reference/dataset_structure:dataset` "BSTLD", there are three -:ref:`segments `: ``train``, ``test`` and ``additional``. -Get the segment names by listing them all. - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Read Dataset / list segment names""" - :end-before: """""" - -Get a segment by passing the required segment name. - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Read Dataset / get segment""" - :end-before: """""" - - -In the train :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Read Dataset / get data""" - :end-before: """""" - -In each :ref:`reference/dataset_structure:data`, -there is a sequence of :ref:`reference/label_format:Box2D` annotations, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Read Dataset / get label""" - :end-before: """""" - -There is only one label type in "BSTLD" dataset, which is ``box2d``. -The information stored in :ref:`reference/label_format:category` is -one of the names in "categories" list of :ref:`catalog.json `. The information stored -in :ref:`reference/label_format:attributes` is one or several of the attributes in "attributes" list of :ref:`catalog.json `. -See :ref:`reference/label_format:Box2D` label format for more details. - -**************** - Delete Dataset -**************** - -.. literalinclude:: ../../../docs/code/BSTLD.py - :language: python - :start-after: """Delete Dataset""" - :end-before: """""" diff --git a/docs/source/examples/DogsVsCats.rst b/docs/source/examples/DogsVsCats.rst deleted file mode 100644 index e39c4a2e0..000000000 --- a/docs/source/examples/DogsVsCats.rst +++ /dev/null @@ -1,162 +0,0 @@ -############## - Dogs vs Cats -############## - -This topic describes how to manage the `Dogs vs Cats Dataset `_, -which is a dataset with :ref:`reference/label_format:Classification` label. - -***************************** - Authorize a Client Instance -***************************** - -An :ref:`reference/glossary:accesskey` is needed to authenticate identity when using TensorBay. - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Authorize a Client Instance""" - :end-before: """""" - -**************** - Create Dataset -**************** - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Create Dataset""" - :end-before: """""" - -****************** - Organize Dataset -****************** - -It takes the following steps to organize the "Dogs vs Cats" dataset by the :class:`~tensorbay.dataset.dataset.Dataset` instance. - -Step 1: Write the Catalog -========================= - -A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which -is typically stored in a json file. - -.. literalinclude:: ../../../tensorbay/opendataset/DogsVsCats/catalog.json - :language: json - :name: dogsvscats-catalog - :linenos: - -The only annotation type for "Dogs vs Cats" is :ref:`reference/label_format:Classification`, and there are 2 -:ref:`reference/label_format:category` types. - -.. important:: - - See :ref:`catalog table ` for more catalogs with different label types. - -Step 2: Write the Dataloader -============================ - -A :ref:`reference/glossary:dataloader` is needed to organize the dataset into -a :class:`~tensorbay.dataset.dataset.Dataset` instance. - -.. literalinclude:: ../../../tensorbay/opendataset/DogsVsCats/loader.py - :language: python - :name: dogsvscats-dataloader - :linenos: - -See :ref:`Classification annotation ` for more details. - -.. note:: - - Since the :ref:`Dogs vs Cats dataloader ` above is already included in TensorBay, - so it uses relative import. - However, the regular import should be used when writing a new dataloader. - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Organize Dataset / regular import""" - :end-at: from tensorbay.label import Classification - -There are already a number of dataloaders in TensorBay SDK provided by the community. -Thus, instead of writing, importing an available dataloadert is also feasible. - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Organize dataset / import dataloader""" - :end-before: """""" - -.. note:: - - Note that catalogs are automatically loaded in available dataloaders, users do not have to write them again. - -.. important:: - - See :ref:`dataloader table ` for more examples of dataloaders with different label types. - -**************** - Upload Dataset -**************** - -The organized "Dogs vs Cats" dataset can be uploaded to TensorBay for sharing, reuse, etc. - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Upload Dataset""" - :end-before: """""" - -Similar with Git, the commit step after uploading can record changes to the dataset as a version. -If needed, do the modifications and commit again. -Please see :ref:`features/version_control:Version Control` for more details. - -************** - Read Dataset -************** - -Now "Dogs vs Cats" dataset can be read from TensorBay. - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Read Dataset / get dataset""" - :end-before: """""" - -In :ref:`reference/dataset_structure:dataset` "Dogs vs Cats", there are two -:ref:`segments `: ``train`` and ``test``. -Get the segment names by listing them all. - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Read Dataset / list segment names""" - :end-before: """""" - -Get a segment by passing the required segment name. - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Read Dataset / get segment""" - :end-before: """""" - -In the train :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Read Dataset / get data""" - :end-before: """""" - -In each :ref:`reference/dataset_structure:data`, -there is a sequence of :ref:`reference/label_format:Classification` annotations, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Read Dataset / get label""" - :end-before: """""" - -There is only one label type in "Dogs vs Cats" dataset, which is ``classification``. The information stored in :ref:`reference/label_format:category` is -one of the names in "categories" list of :ref:`catalog.json `. -See :ref:`reference/label_format:Classification` label format for more details. - -**************** - Delete Dataset -**************** - -.. literalinclude:: ../../../docs/code/DogsVsCats.py - :language: python - :start-after: """Delete Dataset""" - :end-before: """""" diff --git a/docs/source/examples/LeedsSportsPose.rst b/docs/source/examples/LeedsSportsPose.rst deleted file mode 100644 index 27fdfb505..000000000 --- a/docs/source/examples/LeedsSportsPose.rst +++ /dev/null @@ -1,162 +0,0 @@ -################### - Leeds Sports Pose -################### - -This topic describes how to manage the `Leeds Sports Pose Dataset `_, -which is a dataset with :ref:`reference/label_format:Keypoints2D` label(:numref:`Fig. %s `). - -.. _example-leedssportspose: - -.. figure:: ../images/example-Keypoints2D.png - :scale: 80 % - :align: center - - The preview of an image with labels from "Leeds Sports Pose". - -***************************** - Authorize a Client Instance -***************************** - -An :ref:`reference/glossary:accesskey` is needed to authenticate identity when using TensorBay. - -.. literalinclude:: ../../../docs/code/LeedsSportsPose.py - :language: python - :start-after: """Authorize a Client Instance""" - :end-before: """""" - -**************** - Create Dataset -**************** - -.. literalinclude:: ../../../docs/code/LeedsSportsPose.py - :language: python - :start-after: """Create Dataset""" - :end-before: """""" - -****************** - Organize Dataset -****************** - -It takes the following steps to organize the "Leeds Sports Pose" dataset by the :class:`~tensorbay.dataset.dataset.Dataset` instance. - -Step 1: Write the Catalog -========================= - -A :ref:`reference/dataset_structure:catalog` contains all label information of one dataset, which -is typically stored in a json file. - -.. literalinclude:: ../../../tensorbay/opendataset/LeedsSportsPose/catalog.json - :language: json - :name: LeedsSportsPose-catalog - :linenos: - -The only annotation type for "Leeds Sports Pose" is :ref:`reference/label_format:Keypoints2D`. - -.. important:: - - See :ref:`catalog table ` for more catalogs with different label types. - -Step 2: Write the Dataloader -============================ - -A :ref:`reference/glossary:dataloader` is needed to organize the dataset into -a :class:`~tensorbay.dataset.dataset.Dataset` instance. - -.. literalinclude:: ../../../tensorbay/opendataset/LeedsSportsPose/loader.py - :language: python - :name: LeedsSportsPose-dataloader - :linenos: - -See :ref:`Keipoints2D annotation ` for more details. - -.. note:: - - Since the :ref:`Leeds Sports Pose dataloader ` above is already included in TensorBay, - so it uses relative import. - However, the regular import should be used when writing a new dataloader. - -.. literalinclude:: ../../../docs/code/LeedsSportsPose.py - :language: python - :start-after: """Organize Dataset / regular import""" - :end-at: from tensorbay.label import LabeledKeypoints2D - -There are already a number of dataloaders in TensorBay SDK provided by the community. -Thus, instead of writing, importing an available dataloader is also feasible. - -.. literalinclude:: ../../../docs/code/LeedsSportsPose.py - :language: python - :start-after: """Organize dataset / import dataloader""" - :end-before: """""" - -.. note:: - - Note that catalogs are automatically loaded in available dataloaders, users do not have to write them again. - -.. important:: - - See :ref:`dataloader table ` for dataloaders with different label types. - -**************** - Upload Dataset -**************** - -The organized "BSTLD" dataset can be uploaded to TensorBay for sharing, reuse, etc. - -.. literalinclude:: ../../../docs/code/LeedsSportsPose.py - :language: python - :start-after: """Upload Dataset""" - :end-before: """""" - -Similar with Git, the commit step after uploading can record changes to the dataset as a version. -If needed, do the modifications and commit again. -Please see :ref:`features/version_control:Version Control` for more details. - -************** - Read Dataset -************** - -Now "Leeds Sports Pose" dataset can be read from TensorBay. - -.. literalinclude:: ../../../docs/code/LeedsSportsPose.py - :language: python - :start-after: """Read Dataset / get dataset""" - :end-before: """""" - -In :ref:`reference/dataset_structure:dataset` "Leeds Sports Pose", there is one default -:ref:`reference/dataset_structure:segment` ``""`` (empty string). Get it by passing the segment name. - -.. literalinclude:: ../../../docs/code/LeedsSportsPose.py - :language: python - :start-after: """Read Dataset / get segment""" - :end-before: """""" - -In the default :ref:`reference/dataset_structure:segment`, there is a sequence of :ref:`reference/dataset_structure:data`, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/LeedsSportsPose.py - :language: python - :start-after: """Read Dataset / get data""" - :end-before: """""" - -In each :ref:`reference/dataset_structure:data`, -there is a sequence of :ref:`reference/label_format:Keypoints2D` annotations, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/LeedsSportsPose.py - :language: python - :start-after: """Read Dataset / get label""" - :end-before: """""" - -There is only one label type in "Leeds Sports Pose" dataset, which is ``keypoints2d``. The information stored in ``x`` (``y``) is -the x (y) coordinate of one keypoint of one keypoints list. The information stored in ``v`` is -the visible status of one keypoint of one keypoints list. See :ref:`reference/label_format:Keypoints2D` -label format for more details. - -**************** - Delete Dataset -**************** - -.. literalinclude:: ../../../docs/code/LeedsSportsPose.py - :language: python - :start-after: """Delete Dataset""" - :end-before: """""" diff --git a/docs/source/examples/NeolixOD.rst b/docs/source/examples/NeolixOD.rst deleted file mode 100644 index 18abf509b..000000000 --- a/docs/source/examples/NeolixOD.rst +++ /dev/null @@ -1,170 +0,0 @@ -########### - Neolix OD -########### - -This topic describes how to manage the `Neolix OD dataset`_, -which is a dataset with :ref:`reference/label_format:Box3D` label type -(:numref:`Fig. %s `). - -.. _Neolix OD dataset: https://gas.graviti.cn/dataset/graviti-open-dataset/NeolixOD - -.. _example-neolixod: - -.. figure:: ../images/example-Box3D.png - :scale: 50 % - :align: center - - The preview of a point cloud from "Neolix OD" with Box3D labels. - -***************************** - Authorize a Client Instance -***************************** - -An :ref:`reference/glossary:accesskey` is needed to authenticate identity when using TensorBay. - -.. literalinclude:: ../../../docs/code/NeolixOD.py - :language: python - :start-after: """Authorize a Client Instance""" - :end-before: """""" - -**************** - Create Dataset -**************** - - -.. literalinclude:: ../../../docs/code/NeolixOD.py - :language: python - :start-after: """Create Dataset""" - :end-before: """""" - -****************** - Organize Dataset -****************** - -It takes the following steps to organize "Neolix OD" dataset by the :class:`~tensorbay.dataset.dataset.Dataset` instance. - -Step 1: Write the Catalog -========================= - -A :ref:`Catalog ` contains all label information of one dataset, -which is typically stored in a json file. - -.. literalinclude:: ../../../tensorbay/opendataset/NeolixOD/catalog.json - :language: json - :name: neolixod-catalog - :linenos: - -The only annotation type for "Neolix OD" is :ref:`reference/label_format:Box3D`, and there are 15 -:ref:`reference/label_format:Category` types and 3 :ref:`reference/label_format:Attributes` types. - -.. important:: - - See :ref:`catalog table ` for more catalogs with different label types. - -Step 2: Write the Dataloader -============================ - -A :ref:`reference/glossary:dataloader` is needed to organize the dataset into -a :class:`~tensorbay.dataset.dataset.Dataset` instance. - -.. literalinclude:: ../../../tensorbay/opendataset/NeolixOD/loader.py - :language: python - :name: neolixod-dataloader - :linenos: - -See :ref:`Box3D annotation ` for more details. - -.. note:: - - Since the :ref:`Neolix OD dataloader ` above is already included in TensorBay, - so it uses relative import. - However, the regular import should be used when writing a new dataloader. - -.. literalinclude:: ../../../docs/code/NeolixOD.py - :language: python - :start-after: """Organize Dataset / regular import""" - :end-at: from tensorbay.label import LabeledBox3D - -There are already a number of dataloaders in TensorBay SDK provided by the community. -Thus, instead of writing, importing an available dataloader is also feasible. - -.. literalinclude:: ../../../docs/code/NeolixOD.py - :language: python - :start-after: """Organize dataset / import dataloader""" - :end-before: """""" - -.. note:: - - Note that catalogs are automatically loaded in available dataloaders, users do not have to write them again. - -.. important:: - - See :ref:`dataloader table ` for dataloaders with different label types. - -**************** - Upload Dataset -**************** - -The organized "Neolix OD" dataset can be uploaded to tensorBay for sharing, reuse, etc. - -.. literalinclude:: ../../../docs/code/NeolixOD.py - :language: python - :start-after: """Upload Dataset""" - :end-before: """""" - -Similar with Git, the commit step after uploading can record changes to the dataset as a version. -If needed, do the modifications and commit again. -Please see :ref:`features/version_control:Version Control` for more details. - -************** - Read Dataset -************** - -Now "Neolix OD" dataset can be read from TensorBay. - -.. literalinclude:: ../../../docs/code/NeolixOD.py - :language: python - :start-after: """Read Dataset / get dataset""" - :end-before: """""" - -In :ref:`reference/dataset_structure:Dataset` "Neolix OD", there is only one default -:ref:`Segment `: ``""`` (empty string). -Get a segment by passing the required segment name. - -.. literalinclude:: ../../../docs/code/NeolixOD.py - :language: python - :start-after: """Read Dataset / get segment""" - :end-before: """""" - -In the default :ref:`reference/dataset_structure:Segment`, -there is a sequence of :ref:`reference/dataset_structure:Data`, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/NeolixOD.py - :language: python - :start-after: """Read Dataset / get data""" - :end-before: """""" - -In each :ref:`reference/dataset_structure:Data`, -there is a sequence of :ref:`reference/label_format:Box3D` annotations, - -.. literalinclude:: ../../../docs/code/NeolixOD.py - :language: python - :start-after: """Read Dataset / get label""" - :end-before: """""" - -There is only one label type in "Neolix OD" dataset, which is ``box3d``. -The information stored in :ref:`reference/label_format:Category` is -one of the category names in "categories" list of :ref:`catalog.json `. -The information stored in :ref:`reference/label_format:Attributes` -is one of the attributes in "attributes" list of :ref:`catalog.json `. -See :ref:`reference/label_format:Box3D` label format for more details. - -**************** - Delete Dataset -**************** - -.. literalinclude:: ../../../docs/code/NeolixOD.py - :language: python - :start-after: """Delete Dataset""" - :end-before: """""" diff --git a/docs/source/examples/Newsgroups20.rst b/docs/source/examples/Newsgroups20.rst deleted file mode 100644 index 3d9a73049..000000000 --- a/docs/source/examples/Newsgroups20.rst +++ /dev/null @@ -1,179 +0,0 @@ -############### - 20 Newsgroups -############### - -This topic describes how to manage the `20 Newsgroups dataset`_, which is a dataset -with :ref:`reference/label_format:Classification` label type. - -.. _20 Newsgroups dataset: https://gas.graviti.cn/dataset/data-decorators/Newsgroups20 - -***************************** - Authorize a Client Instance -***************************** - -An :ref:`reference/glossary:accesskey` is needed to authenticate identity when using TensorBay. - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Authorize a Client Instance""" - :end-before: """""" - -**************** - Create Dataset -**************** - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Create Dataset""" - :end-before: """""" - -****************** - Organize Dataset -****************** - -It takes the following steps to organize the "20 Newsgroups" dataset by -the :class:`~tensorbay.dataset.dataset.Dataset` instance. - -Step 1: Write the Catalog -========================= - -A :ref:`Catalog ` contains all label information of one dataset, -which is typically stored in a json file. - -.. literalinclude:: ../../../tensorbay/opendataset/Newsgroups20/catalog.json - :language: json - :name: Newsgroups20-catalog - :linenos: - -The only annotation type for "20 Newsgroups" is :ref:`reference/label_format:Classification`, -and there are 20 :ref:`reference/label_format:Category` types. - -.. important:: - - See :ref:`catalog table ` for more catalogs with different label types. - -.. note:: - The :ref:`categories` in - :ref:`reference/dataset_structure:Dataset` "20 Newsgroups" have parent-child relationship, - and it use "." to sparate different levels. - -Step 2: Write the Dataloader -============================ - -A :ref:`reference/glossary:Dataloader` is neeeded to organize the dataset into a -:class:`~tensorbay.dataset.dataset.Dataset` instance. - -.. literalinclude:: ../../../tensorbay/opendataset/Newsgroups20/loader.py - :language: python - :name: Newsgroups20-dataloader - :linenos: - -See :ref:`Classification annotation ` for more details. - -.. note:: - - The data in "20 Newsgroups" do not have extensions - so that a "txt" extension is added to the remote path of each data file - to ensure the loaded dataset could function well on TensorBay. - -.. note:: - - Since the :ref:`20 Newsgroups dataloader ` above is already included - in TensorBay, so it uses relative import. However, use regular import should be used when - writing a new dataloader. - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Organize Dataset / regular import""" - :end-at: from tensorbay.label import LabeledBox2D - -There are already a number of dataloaders in TensorBay SDK provided by the community. -Thus, instead of writing, importing an available dataloader is also feasible. - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Organize dataset / import dataloader""" - :end-before: """""" - -.. note:: - - Note that catalogs are automatically loaded in available dataloaders, users do not have to write them again. - -.. important:: - - See :ref:`dataloader table ` for dataloaders with different label types. - -**************** - Upload Dataset -**************** - -The organized "20 Newsgroups" dataset can be uploaded to TensorBay for sharing, reuse, etc. - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Upload Dataset""" - :end-before: """""" - -Similar with Git, the commit step after uploading can record changes to the dataset as a version. -If needed, do the modifications and commit again. -Please see :ref:`features/version_control:Version Control` for more details. - -************** - Read Dataset -************** - -Now "20 Newsgroups" dataset can be read from TensorBay. - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Read Dataset / get dataset""" - :end-before: """""" - -In :ref:`reference/dataset_structure:Dataset` "20 Newsgroups", there are four -:ref:`Segments `: ``20news-18828``, -``20news-bydate-test`` and ``20news-bydate-train``, ``20_newsgroups``. -Get the segment names by listing them all. - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Read Dataset / list segment names""" - :end-before: """""" - -Get a segment by passing the required segment name. - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Read Dataset / get segment""" - :end-before: """""" - -In the 20news-18828 :ref:`reference/dataset_structure:Segment`, there is a sequence of :ref:`reference/dataset_structure:Data`, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Read Dataset / get data""" - :end-before: """""" - -In each :ref:`reference/dataset_structure:Data`, -there is a sequence of :ref:`reference/label_format:Classification` annotations, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Read Dataset / get label""" - :end-before: """""" - -There is only one label type in "20 Newsgroups" dataset, which is ``Classification``. -The information stored in :ref:`reference/label_format:Category` is -one of the category names in "categories" list of :ref:`catalog.json `. -See :ref:`this page ` for more details about the -structure of Classification. - -**************** - Delete Dataset -**************** - -.. literalinclude:: ../../../docs/code/Newsgroups20.py - :language: python - :start-after: """Delete Dataset""" - :end-before: """""" diff --git a/docs/source/examples/THCHS30.rst b/docs/source/examples/THCHS30.rst deleted file mode 100644 index 08c079763..000000000 --- a/docs/source/examples/THCHS30.rst +++ /dev/null @@ -1,160 +0,0 @@ -########### - THCHS-30 -########### - -This topic describes how to manage the `THCHS-30 Dataset`_, -which is a dataset with :ref:`reference/label_format:Sentence` label - -.. _THCHS-30 Dataset: https://www.graviti.com/open-datasets/data-decorators/THCHS30 - -***************************** - Authorize a Client Instance -***************************** - -An :ref:`reference/glossary:accesskey` is needed to authenticate identity when using TensorBay. - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Authorize a Client Instance""" - :end-before: """""" - -**************** - Create Dataset -**************** - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Create Dataset""" - :end-before: """""" - -****************** -Organize Dataset -****************** - -It takes the following steps to organize the “THCHS-30” dataset by the :class:`~tensorbay.dataset.dataset.Dataset` instance. - -Step 1: Write the Catalog -========================= - -A :ref:`Catalog ` contains all label information of one -dataset, which is typically stored in a json file. However the catalog of THCHS-30 is too -large, instead of reading it from json file, we read it by mapping from subcatalog that is -loaded by the raw file. Check the :ref:`dataloader ` below for more details. - -.. important:: - - See :ref:`catalog table ` for more catalogs with different - label types. - -Step 2: Write the Dataloader -============================ - -A :ref:`dataloader ` is needed to organize the dataset -into a :class:`~tensorbay.dataset.dataset.Dataset` instance. - -.. literalinclude:: ../../../tensorbay/opendataset/THCHS30/loader.py - :language: python - :name: THCHS30-dataloader - :linenos: - -See :ref:`Sentence annotation ` for more details. - -.. note:: - Since the :ref:`THCHS-30 dataloader ` above is already included in TensorBay, - so it uses relative import. - However, the regular import should be used when writing a new dataloader. - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Organize Dataset / regular import""" - :end-at: from tensorbay.label import LabeledSentence, SentenceSubcatalog, Word - -There are already a number of dataloaders in TensorBay SDK provided by the community. -Thus, instead of writing, importing an available dataloadert is also feasible. - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Organize dataset / import dataloader""" - :end-before: """""" - -.. note:: - - Note that catalogs are automatically loaded in available dataloaders, users do not have to write them again. - -.. important:: - - See :ref:`dataloader table ` for dataloaders with different label types. - -**************** -Upload Dataset -**************** - -The organized "THCHS-30" dataset can be uploaded to TensorBay for sharing, reuse, etc. - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Upload Dataset""" - :end-before: """""" - -Similar with Git, the commit step after uploading can record changes to the dataset as a version. -If needed, do the modifications and commit again. -Please see :ref:`features/version_control:Version Control` for more details. - -************** -Read Dataset -************** - -Now "THCHS-30" dataset can be read from TensorBay. - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Read Dataset / get dataset""" - :end-before: """""" - -In :ref:`reference/dataset_structure:Dataset` "THCHS-30", there are three -:ref:`Segments `: -``dev``, ``train`` and ``test``. -Get the segment names by listing them all. - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Read Dataset / list segment names""" - :end-before: """""" - -Get a segment by passing the required segment name. - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Read Dataset / get segment""" - :end-before: """""" - -In the dev :ref:`reference/dataset_structure:Segment`, -there is a sequence of :ref:`reference/dataset_structure:Data`, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Read Dataset / get data""" - :end-before: """""" - -In each :ref:`reference/dataset_structure:Data`, -there is a sequence of :ref:`reference/label_format:Sentence` annotations, -which can be obtained by index. - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Read Dataset / get label""" - :end-before: """""" - -There is only one label type in "THCHS-30" dataset, which is ``Sentence``. It contains -``sentence``, ``spell`` and ``phone`` information. See :ref:`Sentence ` -label format for more details. - -**************** -Delete Dataset -**************** - -.. literalinclude:: ../../../docs/code/THCHS30.py - :language: python - :start-after: """Delete Dataset""" - :end-before: """""" diff --git a/docs/source/examples/template.json b/docs/source/examples/template.json new file mode 100644 index 000000000..ad2dae699 --- /dev/null +++ b/docs/source/examples/template.json @@ -0,0 +1,34 @@ +{ + "": { + "head": "This topic describes how to manage the\n` Dataset >`_,\nwhich is a dataset with annotation", + "with-image": "(:numref:`Fig. %s <-image>`).\n\n.. _-image:\n\n.. figure:: ../images/.png\n :scale: 50 %\n :align: center\n\n The preview of a dataset sample from \"\".\n\n", + "without-image": ".\n\n" + }, + "Authorize a Client Instance": "An :ref:`reference/glossary:accesskey` is needed to authenticate identity when using TensorBay.\n\n.. code:: python\n\n from tensorbay import GAS\n\n ACCESS_KEY = \"Accesskey-*****\"\n gas = GAS(ACCESS_KEY)\n\n", + "Create Dataset": ".. code:: python\n\n gas.create_dataset(\"\")\n\n", + "Organize Dataset": { + "head": "It takes the following steps to organize the \"\" dataset\nby the :class:`~tensorbay.dataset.dataset.Dataset` instance.\n\nStep 1: Write the Catalog\n=========================\n\nA :ref:`reference/dataset_structure:catalog` contains all label information of one dataset,\nwhich is typically stored in a json file.\n\n", + "with catalog": ".. literalinclude:: ../../../tensorbay/opendataset//catalog.json\n :language: json\n :name: dataset-catalog\n :linenos:\n\n", + "without catalog": "However the catalog of is too large, instead of reading it from json file, we read it by mapping from subcatalog that is loaded by the raw file.\n\n", + "tail": ".. important::\n\n See :ref:`catalog table ` for more catalogs with different label types.\n\nStep 2: Write the Dataloader\n============================\n\nA :ref:`reference/glossary:dataloader` is needed to organize the dataset into a :class:`~tensorbay.dataset.dataset.Dataset` instance.\n\n.. literalinclude:: ../../../tensorbay/opendataset//loader.py\n :language: python\n :name: dataloader\n :linenos:\n\n.. note::\n\n Since the :ref:` dataloader ` above is already included in TensorBay,\n so it uses relative import.\n However, the regular import should be used when writing a new dataloader.\n\n.. code:: python\n\n from tensorbay.dataset import Data, Dataset\n from tensorbay.label import \n\nThere are already a number of dataloaders in TensorBay SDK provided by the community.\nThus, instead of writing, importing an available dataloader is also feasible.\n\n.. code:: python\n\n from tensorbay.opendataset import \n\n dataset = (\"path/to/dataset/directory\")\n\n.. note::\n\n Note that catalogs are automatically loaded in available dataloaders, users do not have to write them again.\n\n.. important::\n\n See :ref:`dataloader table ` for dataloaders with different label types.\n\n" + }, + "Upload Dataset": "The organized \"\" dataset can be uploaded to TensorBay for sharing, reuse, etc.\n\n.. code:: python\n\n dataset_client = gas.upload_dataset(dataset)\n dataset_client.commit(\"initial commit\")\n\nSimilar with Git, the commit step after uploading can record changes to the dataset as a version.\nIf needed, do the modifications and commit again.\nPlease see :ref:`features/version_control:Version Control` for more details.\n\n", + "Read Dataset": { + "get_dataset": "Now \"\" dataset can be read from TensorBay.\n\n.. code:: python\n\n dataset = Dataset(\"\", gas)\n\nIn :ref:`reference/dataset_structure:dataset` \"\",\nthere `: .", + "list_segments": "\nGet the segment names by listing them all.\n\n.. code:: python\n\n dataset.keys()", + "get_segment": "\n\nGet a segment by passing the required segment name.\n\n.. code:: python\n\n segment = dataset[\"\"]\n\nIn the :ref:`reference/dataset_structure:segment`,\nthere is a sequence of :ref:`reference/dataset_structure:data`,\nwhich can be obtained by index.\n\n.. code:: python\n\n data = segment[0]\n\n" + }, + "Read Label": { + "head": "Read :ref:`reference/label_format:` annotation.\n\n", + "category": "The information stored in :ref:`reference/label_format:category` is one of the names in \"categories\" list of :ref:`catalog.json `.\n", + "attributes": "The information stored in :ref:`reference/label_format:attributes` is one or several of the attributes in \"attributes\" list of :ref:`catalog.json `.\n", + "instance": "The informatin stored in :ref:`reference/label_format:instance` is the instance id of the labeled object.\n", + "Classification": ".. code:: python\n\n category = data.label.classification.category\n\n", + "Box2D": ".. code:: python\n\n label_box2d = data.label.box2d[0]\n category = label_box2d.category\n attributes = label_box2d.attributes\n xmin = label_box2d.xmin\n ymin = label_box2d.ymin\n xmax = label_box2d.xmax\n ymax = label_box2d.ymax\n\n", + "Box3D": ".. code:: python\n\n label_box3d = data.label.box3d[0]\n category = label_box3d.category\n attributes = label_box3d.attributes\n translation = label_box3d.translation\n rotation = label_box3d.rotation\n size = label_box3d.size\n\n", + "Keypoints2D": ".. code:: python\n\n label_keypoints2d = data.label.keypoints2d[0]\n x = data.label.keypoints2d[0][0].x\n y = data.label.keypoints2d[0][0].y\n v = data.label.keypoints2d[0][0].v\n\nThe information stored in ``x`` (``y``) is the x (y) coordinate of one keypoint of one keypoints list.\nThe information stored in ``v`` is the visible status of one keypoint.\n\n", + "Sentence": ".. code:: python\n\n labeled_sentence = data.label.sentence[0]\n sentence = labeled_sentence.sentence\n spell = labeled_sentence.spell\n phone = labeled_sentence.phone\n\n", + "tail": "See :ref:`reference/label_format:Label Format` for more details.\n\n" + }, + "Delete Dataset": ".. code:: python\n\n gas.delete_dataset(\"\")\n" +} diff --git a/docs/source/images/example-Box2D.png b/docs/source/images/BSTLD.png similarity index 100% rename from docs/source/images/example-Box2D.png rename to docs/source/images/BSTLD.png diff --git a/docs/source/images/example-Keypoints2D.png b/docs/source/images/LeedsSportsPose.png similarity index 100% rename from docs/source/images/example-Keypoints2D.png rename to docs/source/images/LeedsSportsPose.png diff --git a/docs/source/images/example-Box3D.png b/docs/source/images/NeolixOD.png similarity index 100% rename from docs/source/images/example-Box3D.png rename to docs/source/images/NeolixOD.png