From 38c0bc2dd02e0aef4b6f9ae64319c818dbf78ecc Mon Sep 17 00:00:00 2001
From: Jinyu-W <53509467+Jinyu-W@users.noreply.github.com>
Date: Wed, 30 Sep 2020 10:58:46 +0800
Subject: [PATCH] doc refined (#122)

* doc refined

* tailing space removed

Co-authored-by: Jinyu Wang <Wang.Jinyu@microsoft.com>
---
 docs/source/conf.py                           |   2 +-
 docs/source/index.rst                         |   2 +-
 .../grass_cluster_provisioning_on_azure.rst   |  88 ++++----
 .../k8s_cluster_provisioning_on_azure.rst     |  77 +++----
 docs/source/installation/pip_install.rst      |  56 ++---
 docs/source/installation/playground.rst       |   7 +-
 .../source/key_components/business_engine.rst |   3 -
 docs/source/key_components/communication.rst  |  37 ++--
 docs/source/key_components/data_model.rst     | 209 ++++++++----------
 .../key_components/distributed_toolkit.rst    |  12 +-
 docs/source/key_components/event_buffer.rst   |   7 +-
 docs/source/key_components/orchestration.rst  |  13 +-
 docs/source/key_components/rl_toolkit.rst     | 100 +++------
 .../key_components/simulation_toolkit.rst     |   6 +-
 docs/source/scenarios/citi_bike.rst           |  20 +-
 .../container_inventory_management.rst        |  21 +-
 16 files changed, 256 insertions(+), 404 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 751c0a194..29ea9cc92 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -40,7 +40,7 @@
               "sphinx.ext.coverage",
               "sphinx.ext.napoleon",
               "sphinx.ext.viewcode",
-              "sphinx_markdown_tables"
+              "sphinx_markdown_tables",
               ]
 
 napoleon_google_docstring = True
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 261a14836..9582d8d12 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -10,7 +10,7 @@ Multi-Agent Resource Optimization (MARO) platform is an instance of Reinforcemen
 learning as a Service (RaaS) for real-world resource optimization. It can be
 applied to many important industrial domains, such as container inventory
 management in logistics, bike repositioning in transportation, virtual machine
-provisioning in data centers, and asset management in finance. Besides 
+provisioning in data centers, and asset management in finance. Besides
 `Reinforcement Learning <https://www.andrew.cmu.edu/course/10-703/textbook/BartoSutton.pdf>`_ (RL), it
 also supports other planning/decision mechanisms, such as
 `Operations Research <https://en.wikipedia.org/wiki/Operations_research>`_.
diff --git a/docs/source/installation/grass_cluster_provisioning_on_azure.rst b/docs/source/installation/grass_cluster_provisioning_on_azure.rst
index ac895b05f..39b05ff40 100644
--- a/docs/source/installation/grass_cluster_provisioning_on_azure.rst
+++ b/docs/source/installation/grass_cluster_provisioning_on_azure.rst
@@ -9,7 +9,6 @@ on Azure and run your training job in a distributed environment.
 Prerequisites
 -------------
 
-
 * `Install the Azure CLI and login <https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest>`_
 * `Install docker <https://docs.docker.com/engine/install/>`_ and
   `Configure docker to make sure it can be managed as a non-root user <https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user>`_
@@ -17,103 +16,92 @@ Prerequisites
 Cluster Management
 ------------------
 
-
 * Create a cluster with a `deployment <#grass-azure-create>`_
 
-.. code-block:: sh
-
-   # Create a grass cluster with a grass-create deployment
-   maro grass create ./grass-azure-create.yml
+  .. code-block:: sh
 
+    # Create a grass cluster with a grass-create deployment
+    maro grass create ./grass-azure-create.yml
 
 * Scale the cluster
 
-.. code-block:: sh
+  .. code-block:: sh
 
-   # Scale nodes with 'Standard_D4s_v3' specification to 2
-   maro grass node scale my_grass_cluster Standard_D4s_v3 2
-
-Check `VM Size <https://docs.microsoft.com/en-us/azure/virtual-machines/sizes>`_
-to see more node specifications.
+    # Scale nodes with 'Standard_D4s_v3' specification to 2
+    maro grass node scale my_grass_cluster Standard_D4s_v3 2
 
+  Check `VM Size <https://docs.microsoft.com/en-us/azure/virtual-machines/sizes>`_
+  to see more node specifications.
 
 * Delete the cluster
 
-.. code-block:: sh
-
-   # Delete a grass cluster
-   maro grass delete my_grass_cluster
+  .. code-block:: sh
 
+    # Delete a grass cluster
+    maro grass delete my_grass_cluster
 
 * Start/stop nodes to save costs
 
-.. code-block:: sh
+  .. code-block:: sh
 
-   # Start 2 nodes with 'Standard_D4s_v3' specification
-   maro grass node start my_grass_cluster Standard_D4s_v3 2
+    # Start 2 nodes with 'Standard_D4s_v3' specification
+    maro grass node start my_grass_cluster Standard_D4s_v3 2
 
-   # Stop 2 nodes with 'Standard_D4s_v3' specification
-   maro grass node stop my_grass_cluster Standard_D4s_v3 2
+    # Stop 2 nodes with 'Standard_D4s_v3' specification
+    maro grass node stop my_grass_cluster Standard_D4s_v3 2
 
 Run Job
 -------
 
-
 * Push your training image
 
-.. code-block:: sh
-
-   # Push image 'my_image' to the cluster
-   maro grass image push my_grass_cluster --image-name my_image
+  .. code-block:: sh
 
+    # Push image 'my_image' to the cluster
+    maro grass image push my_grass_cluster --image-name my_image
 
 * Push your training data
 
-.. code-block:: sh
-
-   # Push data under './my_training_data' to a relative path '/my_training_data' in the cluster
-   # You can then assign your mapping location in the start-job deployment
-   maro grass data push my_grass_cluster ./my_training_data/* /my_training_data
+  .. code-block:: sh
 
+    # Push data under './my_training_data' to a relative path '/my_training_data' in the cluster
+    # You can then assign your mapping location in the start-job deployment
+    maro grass data push my_grass_cluster ./my_training_data/* /my_training_data
 
 * Start a training job with a `deployment <#grass-start-job>`_
 
-.. code-block:: sh
-
-   # Start a training job with a start-job deployment
-   maro grass job start my_grass_cluster ./grass-start-job.yml
+  .. code-block:: sh
 
+    # Start a training job with a start-job deployment
+    maro grass job start my_grass_cluster ./grass-start-job.yml
 
 * Or, schedule batch jobs with a `deployment <#grass-start-schedule>`_
 
-.. code-block:: sh
-
-   # Start a training schedule with a start-schedule deployment
-   maro grass schedule start my_grass_cluster ./grass-start-schedule.yml
+  .. code-block:: sh
 
+    # Start a training schedule with a start-schedule deployment
+    maro grass schedule start my_grass_cluster ./grass-start-schedule.yml
 
 * Get the logs of the job
 
-.. code-block:: sh
-
-   # Get the logs of the job
-   maro grass job logs my_grass_cluster my_job_1
+  .. code-block:: sh
 
+    # Get the logs of the job
+    maro grass job logs my_grass_cluster my_job_1
 
 * List the current status of the job
 
-.. code-block:: sh
-
-   # List the current status of the job
-   maro grass job list my_grass_cluster
+  .. code-block:: sh
 
+    # List the current status of the job
+    maro grass job list my_grass_cluster
 
 * Stop a training job
 
-.. code-block:: sh
+  .. code-block:: sh
 
-   # Stop a training job
-   maro grass job stop my_job_1
+    # Stop a training job
+    maro grass job stop my_job_1
 
 Sample Deployments
 ------------------
diff --git a/docs/source/installation/k8s_cluster_provisioning_on_azure.rst b/docs/source/installation/k8s_cluster_provisioning_on_azure.rst
index d3182e047..26dbdd664 100644
--- a/docs/source/installation/k8s_cluster_provisioning_on_azure.rst
+++ b/docs/source/installation/k8s_cluster_provisioning_on_azure.rst
@@ -9,7 +9,6 @@ on Azure and run your training job in a distributed environment.
 Prerequisites
 -------------
 
-
 * `Install the Azure CLI and login <https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest>`_
 * `Install and set up kubectl <https://kubernetes.io/docs/tasks/tools/install-kubectl/>`_
 * `Install docker <https://docs.docker.com/engine/install/>`_ and
@@ -18,92 +17,82 @@ Prerequisites
 Cluster Management
 ------------------
 
-
 * Create a cluster with a `deployment <#k8s-azure-create>`_
 
-.. code-block:: sh
-
-   # Create a k8s cluster
-   maro k8s create ./k8s-azure-create.yml
+  .. code-block:: sh
 
+    # Create a k8s cluster
+    maro k8s create ./k8s-azure-create.yml
 
 * Scale the cluster
 
-.. code-block:: sh
+  .. code-block:: sh
 
-   # Scale nodes with 'Standard_D4s_v3' specification to 2
-   maro k8s node scale my_k8s_cluster Standard_D4s_v3 2
-
-Check `VM Size <https://docs.microsoft.com/en-us/azure/virtual-machines/sizes>`_
-to see more node specifications.
+    # Scale nodes with 'Standard_D4s_v3' specification to 2
+    maro k8s node scale my_k8s_cluster Standard_D4s_v3 2
 
+  Check `VM Size <https://docs.microsoft.com/en-us/azure/virtual-machines/sizes>`_
+  to see more node specifications.
 
 * Delete the cluster
 
-.. code-block:: sh
+  .. code-block:: sh
 
-   # Delete a k8s cluster
-   maro k8s delete my_k8s_cluster
+    # Delete a k8s cluster
+    maro k8s delete my_k8s_cluster
 
 Run Job
 -------
 
-
 * Push your training image
 
-.. code-block:: sh
-
-   # Push image 'my_image' to the cluster
-   maro k8s image push my_k8s_cluster --image-name my_image
+  .. code-block:: sh
 
+    # Push image 'my_image' to the cluster
+    maro k8s image push my_k8s_cluster --image-name my_image
 
 * Push your training data
 
-.. code-block:: sh
-
-   # Push data under './my_training_data' to a relative path '/my_training_data' in the cluster
-   # You can then assign your mapping location in the start-job deployment
-   maro k8s data push my_k8s_cluster ./my_training_data/* /my_training_data
+  .. code-block:: sh
 
+    # Push data under './my_training_data' to a relative path '/my_training_data' in the cluster
+    # You can then assign your mapping location in the start-job deployment
+    maro k8s data push my_k8s_cluster ./my_training_data/* /my_training_data
 
 * Start a training job with a `deployment <#k8s-start-job>`_
 
-.. code-block:: sh
-
-   # Start a training job with a start-job deployment
-   maro k8s job start my_k8s_cluster ./k8s-start-job.yml
+  .. code-block:: sh
 
+    # Start a training job with a start-job deployment
+    maro k8s job start my_k8s_cluster ./k8s-start-job.yml
 
 * Or, schedule batch jobs with a `deployment <#k8s-start-schedule>`_
 
-.. code-block:: sh
-
-   # Start a training schedule with a start-schedule deployment
-   maro k8s schedule start my_k8s123_cluster ./k8s-start-schedule.yml
+  .. code-block:: sh
 
+    # Start a training schedule with a start-schedule deployment
+    maro k8s schedule start my_k8s123_cluster ./k8s-start-schedule.yml
 
 * Get the logs of the job
 
-.. code-block:: sh
-
-   # Logs will be exported to current directory
-   maro k8s job logs my_k8s_cluster my_job_1
+  .. code-block:: sh
 
+    # Logs will be exported to current directory
+    maro k8s job logs my_k8s_cluster my_job_1
 
 * List the current status of the job
 
-.. code-block:: sh
-
-   # List current status of jobs
-   maro k8s job list my_k8s_cluster my_job_1
+  .. code-block:: sh
 
+    # List current status of jobs
+    maro k8s job list my_k8s_cluster my_job_1
 
 * Stop a training job
 
-.. code-block:: sh
+  .. code-block:: sh
 
-   # Stop a training job
-   maro k8s job stop my_k8s_cluster my_job_1
+    # Stop a training job
+    maro k8s job stop my_k8s_cluster my_job_1
 
 Sample Deployments
 ------------------
diff --git a/docs/source/installation/pip_install.rst b/docs/source/installation/pip_install.rst
index 6796a9e16..3616a9e86 100644
--- a/docs/source/installation/pip_install.rst
+++ b/docs/source/installation/pip_install.rst
@@ -5,31 +5,25 @@ Package
 Install MARO from `PyPI <https://pypi.org/project/pymaro/#files>`_
 ----------------------------------------------------------------------
 
-
-* 
-  Max OS / Linux
+* Max OS / Linux
 
   .. code-block:: sh
 
-     pip install pymaro
+    pip install pymaro
 
-* 
-  Windows
+* Windows
 
-  .. code-block:: powershell
+  .. code-block::
 
-     # Install torch first, if you don't have one.
-     pip install torch===1.6.0 torchvision===0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
+    # Install torch first, if you don't have one.
+    pip install torch===1.6.0 torchvision===0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
 
-     pip install pymaro
+    pip install pymaro
 
 Install MARO from Source (\ `Editable Mode <https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs>`_\ )
 ------------------------------------------------------------------------------------------------------------------------
 
-
-* 
-  Prerequisites
-
+* Prerequisites
 
   * `Python >= 3.6, < 3.8 <https://www.python.org/downloads/>`_
   * C++ Compiler
@@ -37,12 +31,9 @@ Install MARO from Source (\ `Editable Mode <https://pip.pypa.io/en/stable/refere
     * Linux or Mac OS X: ``gcc``
     * Windows: `Build Tools for Visual Studio 2017 <https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=BuildTools&rel=15>`_
 
-* 
-  Enable Virtual Environment
-
+* Enable Virtual Environment
 
-  * 
-    Mac OS / Linux
+  * Mac OS / Linux
 
     .. code-block:: sh
 
@@ -50,31 +41,26 @@ Install MARO from Source (\ `Editable Mode <https://pip.pypa.io/en/stable/refere
        python -m venv maro_venv
        source ./maro_venv/bin/activate
 
-  * 
-    Windows
+  * Windows
 
     .. code-block:: powershell
 
-       # If your environment is not clean, create a virtual environment firstly.
-       python -m venv maro_venv
-       .\maro_venv\Scripts\activate
-
-* 
-  Install MARO
+      # If your environment is not clean, create a virtual environment firstly.
+      python -m venv maro_venv
+      .\maro_venv\Scripts\activate
 
+* Install MARO
 
-  * 
-    Mac OS / Linux
+  * Mac OS / Linux
 
     .. code-block:: sh
 
-       # Install MARO from source.
-       bash scripts/install_maro.sh
+      # Install MARO from source.
+      bash scripts/install_maro.sh
 
-  * 
-    Windows
+  * Windows
 
     .. code-block:: powershell
 
-       # Install MARO from source.
-       .\scripts\install_maro.bat
+      # Install MARO from source.
+      .\scripts\install_maro.bat
diff --git a/docs/source/installation/playground.rst b/docs/source/installation/playground.rst
index aecca308a..34ffc33f6 100644
--- a/docs/source/installation/playground.rst
+++ b/docs/source/installation/playground.rst
@@ -16,9 +16,7 @@ Pull from `Docker Hub <https://hub.docker.com/repository/registry-1.docker.io/ar
 Run from Source
 ---------------
 
-
-* 
-  Mac OS / Linux
+* Mac OS / Linux
 
   .. code-block:: sh
 
@@ -31,8 +29,7 @@ Run from Source
      # Jupyter lab with maro -> http://127.0.0.1:40011
      docker run -p 40009:40009 -p 40010:40010 -p 40011:40011 maro/playground:cpu
 
-* 
-  Windows
+* Windows
 
   .. code-block::
 
diff --git a/docs/source/key_components/business_engine.rst b/docs/source/key_components/business_engine.rst
index f68e2f6b0..7145fb961 100644
--- a/docs/source/key_components/business_engine.rst
+++ b/docs/source/key_components/business_engine.rst
@@ -8,7 +8,6 @@ engines should be bind to it.
 
 The business engine is responsible for defining:
 
-
 * **Business instance**. Generally, the business instances are the resource
   holders in the business logic. For example:
 
@@ -47,12 +46,10 @@ driven by these business events. In short, with the uniformed business engine
 interface, the simulation of different business scenarios is only based on the
 pluggable business engine (scenario-specific).
 
-
 .. image:: ../images/simulator/business_engine.svg
    :target: ../images/simulator/business_engine.svg
    :alt: Business Engine
 
-
 Generally, the business time series data is read from the historical log or
 generated by a data generation model. Currently, for topologies in Citi Bike
 scenario, data processing is needed before starting the simulation. You can find
diff --git a/docs/source/key_components/communication.rst b/docs/source/key_components/communication.rst
index 7787f2332..fa6926af1 100644
--- a/docs/source/key_components/communication.rst
+++ b/docs/source/key_components/communication.rst
@@ -16,12 +16,10 @@ the underlying driver is pluggable based on the real requirements.
 Currently, we use `ZeroMQ <https://zeromq.org/>`_ as the default choice.
 Proxy also provides support for peer discovering based on `Redis <https://redis.io/>`_.
 
-
 .. image:: ../images/distributed/proxy.svg
    :target: ../images/distributed/proxy.svg
    :alt: Proxy
 
-
 Message
 ^^^^^^^
 
@@ -29,7 +27,6 @@ Message is designed for general purpose,
 it is used to package the communication content between components.
 The main attributes of a message instance include:
 
-
 * ``tag``\ : A customized attribute, it can be used to implement the auto-dispatching logic
   with a `conditional event register table <#conditional-event-register-table>`_.
 * ``source``\ : The alias of the message sender.
@@ -53,7 +50,6 @@ Session Message
 
 We provide two kinds of predefined session types for common distributed scenarios:
 
-
 * **Task Session**\ : It is used to describe a computing task sent from master to worker.
   Three stages are included:
 
@@ -92,7 +88,6 @@ both blocking and non-blocking cases. These primitives are decoupled from
 the underlying implementation of the communication driver (protocol).
 The main primitives are listed below:
 
-
 * ``send``\ : Unicast. It is a blocking, one-to-one sending mode.
   It will watch and collect the reply message from the remote peer.
 * ``isend``\ : The non-blocking version of the ``send``.
@@ -119,18 +114,15 @@ By registering the ``conditional event`` and related ``handler function`` to
 the register table, the handler function will be automatically executed
 with the received messages when the event conditions are met.
 
-
 .. image:: ../images/distributed/register_table.register.svg
    :target: ../images/distributed/register_table.register.svg
    :alt: Register Table
 
-
 ``Conditional event`` is used to declare the required message group for
 auto-triggering the related handler function.
 The unit event is the minimal component in the conditional event,
 it follows a three-stage format: `source`:`tag`:\ ``amount``.
 
-
 * ``source``\ : It is used to declare the required message source.
   The longest-prefix matching is supported.
 
@@ -145,45 +137,42 @@ it follows a three-stage format: `source`:`tag`:\ ``amount``.
 
   * ``%`` is used to represent the relative percentages, such as 60%, 10%, etc.
 
-.. code-block:: python
+  .. code-block:: python
 
-   unit_event_abs = "worker:update:10"
+    unit_event_abs = "worker:update:10"
 
-   unit_event_rel = "worker:update:60%"
+    unit_event_rel = "worker:update:60%"
 
 To support more complex business logic,
 we provide two operations: ``AND`` and ``OR`` to combine unit events up:
 
-
 * ``AND``\ : Valid for multiple unit events and combined unit events.
   The combined event condition is met if all the conditions of the sub-events are met.
 * ``OR``\ : Valid for multiple unit events and combined unit events.
   The combined event condition is met if any sub-event meets the condition.
 
-.. code-block:: python
+  .. code-block:: python
 
-   combined_event_and = ("worker_01:update:2",
-                         "worker_02:update:3",
-                         "AND")
+    combined_event_and = ("worker_01:update:2",
+                          "worker_02:update:3",
+                          "AND")
 
-   combined_event_or = ("worker_03:update:1",
-                        "worker_04:update:5",
-                        "OR")
+    combined_event_or = ("worker_03:update:1",
+                          "worker_04:update:5",
+                          "OR")
 
-   combined_event_mix = (("worker_01:update:2", "worker_02:update:3", "AND"),
-                         "worker_03:update:1",
-                         "OR")
+    combined_event_mix = (("worker_01:update:2", "worker_02:update:3", "AND"),
+                          "worker_03:update:1",
+                          "OR")
 
 ``Handler function`` is a user-defined callback function that is bind to
 a specific conditional event. When the condition of the event is met,
 the related messages will be sent to the handler function for its execution.
 
-
 .. image:: ../images/distributed/register_table.trigger.svg
    :target: ../images/distributed/register_table.trigger.svg
    :alt: Register Table
 
-
 .. code-block:: python
 
    # A common handler function signature
diff --git a/docs/source/key_components/data_model.rst b/docs/source/key_components/data_model.rst
index 3b96878e9..5bcaff3d7 100644
--- a/docs/source/key_components/data_model.rst
+++ b/docs/source/key_components/data_model.rst
@@ -11,15 +11,13 @@ implementation based on their real performance requirement and device limitation
 Key Concepts
 ------------
 
-
 .. image:: ../images/simulator/key_concepts.svg
    :target: ../images/simulator/key_concepts.svg
    :alt: Key Concepts
-
+   :width: 220
 
 As shown in the figure above, there are some key concepts in the data model:
 
-
 * **Node** is the abstraction of the resource holder, which is usually the major
   business instance of the scenario (i.e. vessels and ports in CIM scenario). A
   node usually has various attributes to present the business nature.
@@ -36,113 +34,98 @@ As shown in the figure above, there are some key concepts in the data model:
 * **Snapshot List** is the dumped frames based on a pre-defined resolution.
   It captures the aggregated changes of the environment between the dump points.
 
-
-.. raw:: html
-
-   <!-- is the abstraction of business properties for the
-   scenarios-specific resource holder. Different attributes of one node can be
-   different data types. Besides, for each attribute, you can also declare a `slot`
-   number (the default value is one). It can indicate the attribute values (e.g.
-   the three different container types in CIM scenario), the detailed categories
-   (e.g. the ten specific products in the Use Case below), etc. Also, the attribute
-   with a slot number can be seen as a fixed-sized array. -->
-
-
-
 Use Case
 --------
 
-
 * Below is the declaration of a retail frame, which includes warehouse and store nodes.
 
-.. code-block:: python
-
-   from maro.backends.frame import node, NodeAttribute, NodeBase, FrameNode, FrameBase
+  .. code-block:: python
 
-   TOTAL_PRODUCT_CATEGORIES = 10
-   TOTAL_STORES = 8
-   TOTAL_WAREHOUSES = 2
-   TOTAL_SNAPSHOT = 100
+    from maro.backends.frame import node, NodeAttribute, NodeBase, FrameNode, FrameBase
 
+    TOTAL_PRODUCT_CATEGORIES = 10
+    TOTAL_STORES = 8
+    TOTAL_WAREHOUSES = 2
+    TOTAL_SNAPSHOT = 100
 
-   @node("warehouse")
-   class Warehouse(NodeBase):
-       inventories = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES)
-       shortages = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES)
 
-       def __init__(self):
-           self._init_inventories = [100 * (i + 1) for i in range(TOTAL_PRODUCT_CATEGORIES)]
-           self._init_shortages = [0] * TOTAL_PRODUCT_CATEGORIES
+    @node("warehouse")
+    class Warehouse(NodeBase):
+        inventories = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES)
+        shortages = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES)
 
-       def reset(self):
-           self.inventories[:] = self._init_inventories
-           self.shortages[:] = self._init_shortages
+        def __init__(self):
+            self._init_inventories = [100 * (i + 1) for i in range(TOTAL_PRODUCT_CATEGORIES)]
+            self._init_shortages = [0] * TOTAL_PRODUCT_CATEGORIES
 
+        def reset(self):
+            self.inventories[:] = self._init_inventories
+            self.shortages[:] = self._init_shortages
 
-   @node("store")
-   class Store(NodeBase):
-       inventories = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES)
-       shortages = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES)
-       sales = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES)
 
-       def __init__(self):
-           self._init_inventories = [10 * (i + 1) for i in range(TOTAL_PRODUCT_CATEGORIES)]
-           self._init_shortages = [0] * TOTAL_PRODUCT_CATEGORIES
-           self._init_sales = [0] * TOTAL_PRODUCT_CATEGORIES
+    @node("store")
+    class Store(NodeBase):
+        inventories = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES)
+        shortages = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES)
+        sales = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES)
 
-       def reset(self):
-           self.inventories[:] = self._init_inventories
-           self.shortages[:] = self._init_shortages
-           self.sales[:] = self._init_sales
+        def __init__(self):
+            self._init_inventories = [10 * (i + 1) for i in range(TOTAL_PRODUCT_CATEGORIES)]
+            self._init_shortages = [0] * TOTAL_PRODUCT_CATEGORIES
+            self._init_sales = [0] * TOTAL_PRODUCT_CATEGORIES
 
+        def reset(self):
+            self.inventories[:] = self._init_inventories
+            self.shortages[:] = self._init_shortages
+            self.sales[:] = self._init_sales
 
-   class RetailFrame(FrameBase):
-       warehouses = FrameNode(Warehouse, TOTAL_WAREHOUSES)
-       stores = FrameNode(Store, TOTAL_STORES)
 
-       def __init__(self):
-           # If your actual frame number was more than the total snapshot number, the old snapshots would be rolling replaced.
-           super().__init__(enable_snapshot=True, total_snapshot=TOTAL_SNAPSHOT)
+    class RetailFrame(FrameBase):
+        warehouses = FrameNode(Warehouse, TOTAL_WAREHOUSES)
+        stores = FrameNode(Store, TOTAL_STORES)
 
+        def __init__(self):
+            # If your actual frame number was more than the total snapshot number, the old snapshots would be rolling replaced.
+            super().__init__(enable_snapshot=True, total_snapshot=TOTAL_SNAPSHOT)
 
 * The operations on the retail frame.
 
-.. code-block:: python
+  .. code-block:: python
 
-   retail_frame = RetailFrame()
+    retail_frame = RetailFrame()
 
-   # Fulfill the initialization values to the backend memory.
-   for store in retail_frame.stores:
-       store.reset()
+    # Fulfill the initialization values to the backend memory.
+    for store in retail_frame.stores:
+        store.reset()
 
-   # Fulfill the initialization values to the backend memory.
-   for warehouse in retail_frame.warehouses:
-       warehouse.reset()
+    # Fulfill the initialization values to the backend memory.
+    for warehouse in retail_frame.warehouses:
+        warehouse.reset()
 
-   # Take a snapshot of the first tick frame.
-   retail_frame.take_snapshot(0)
-   snapshot_list = retail_frame.snapshots
-   print(f"Max snapshot list capacity: {len(snapshot_list)}")
+    # Take a snapshot of the first tick frame.
+    retail_frame.take_snapshot(0)
+    snapshot_list = retail_frame.snapshots
+    print(f"Max snapshot list capacity: {len(snapshot_list)}")
 
-   # Query sales, inventory information of all stores at first tick, len(snapshot_list["store"]) equals to TOTAL_STORES.
-   all_stores_info = snapshot_list["store"][0::["sales", "inventories"]].reshape(TOTAL_STORES, -1)
-   print(f"All stores information at first tick (numpy array): {all_stores_info}")
+    # Query sales, inventory information of all stores at first tick, len(snapshot_list["store"]) equals to TOTAL_STORES.
+    all_stores_info = snapshot_list["store"][0::["sales", "inventories"]].reshape(TOTAL_STORES, -1)
+    print(f"All stores information at first tick (numpy array): {all_stores_info}")
 
-   # Query shortage information of first store at first tick.
-   first_store_shortage = snapshot_list["store"][0:0:"shortages"]
-   print(f"First store shortages at first tick (numpy array): {first_store_shortage}")
+    # Query shortage information of first store at first tick.
+    first_store_shortage = snapshot_list["store"][0:0:"shortages"]
+    print(f"First store shortages at first tick (numpy array): {first_store_shortage}")
 
-   # Query inventory information of all warehouses at first tick, len(snapshot_list["warehouse"]) equals to TOTAL_WAREHOUSES.
-   all_warehouses_info = snapshot_list["warehouse"][0::"inventories"].reshape(TOTAL_WAREHOUSES, -1)
-   print(f"All warehouses information at first tick (numpy array): {all_warehouses_info}")
+    # Query inventory information of all warehouses at first tick, len(snapshot_list["warehouse"]) equals to TOTAL_WAREHOUSES.
+    all_warehouses_info = snapshot_list["warehouse"][0::"inventories"].reshape(TOTAL_WAREHOUSES, -1)
+    print(f"All warehouses information at first tick (numpy array): {all_warehouses_info}")
 
-   # Add fake shortages to first store.
-   retail_frame.stores[0].shortages[:] = [i + 1 for i in range(TOTAL_PRODUCT_CATEGORIES)]
-   retail_frame.take_snapshot(1)
+    # Add fake shortages to first store.
+    retail_frame.stores[0].shortages[:] = [i + 1 for i in range(TOTAL_PRODUCT_CATEGORIES)]
+    retail_frame.take_snapshot(1)
 
-   # Query shortage information of first and second store at first and second tick.
-   store_shortage_history = snapshot_list["store"][[0, 1]: [0, 1]: "shortages"].reshape(2, -1)
-   print(f"First and second store shortage history at the first and second tick (numpy array): {store_shortage_history}")
+    # Query shortage information of first and second store at first and second tick.
+    store_shortage_history = snapshot_list["store"][[0, 1]: [0, 1]: "shortages"].reshape(2, -1)
+    print(f"First and second store shortage history at the first and second tick (numpy array): {store_shortage_history}")
 
 Supported Attribute Data Type
 -----------------------------
@@ -150,6 +133,7 @@ Supported Attribute Data Type
 All supported data types for the attribute of the node:
 
 .. list-table::
+   :widths: 25 25 60
    :header-rows: 1
 
    * - Attribute Data Type
@@ -171,29 +155,26 @@ All supported data types for the attribute of the node:
      - double
      - -1.7E308 .. 1.7E308
 
-
 Advanced Features
 -----------------
 
 For better data access, we also provide some advanced features, including:
 
-
 * **Attribute value change handler**\ : It is a hook function for the value change
   event on a specific attribute. The member function with the
   ``_on_{attribute_name}_changed`` naming pattern will be automatically invoked when
   the related attribute value changed. Below is the example code:
 
-.. code-block:: python
+  .. code-block:: python
 
-   from maro.backends.frame import node, NodeBase, NodeAttribute
+    from maro.backends.frame import node, NodeBase, NodeAttribute
 
-   @node("test_node")
-   class TestNode(NodeBase):
-       test_attribute = NodeAttribute("i")
-
-       def _on_test_attribute_changed(self, value: int):
-           pass
+    @node("test_node")
+    class TestNode(NodeBase):
+        test_attribute = NodeAttribute("i")
 
+        def _on_test_attribute_changed(self, value: int):
+            pass
 
 * **Snapshot list slicing**\ : It provides a slicing interface for querying
   temporal (frame), spatial (node), intra-node (attribute) information. Both a
@@ -201,39 +182,37 @@ For better data access, we also provide some advanced features, including:
   node(s), and attribute(s), while the empty means querying all. The return value
   is a flattened 1-dimension NumPy array, which aligns with the slicing order as below:
 
+  .. image:: ../images/simulator/snapshot_list_slicing.svg
+    :target: ../images/simulator/snapshot_list_slicing.svg
+    :alt: Snapshot List Slicing
 
-.. image:: ../images/simulator/snapshot_list_slicing.svg
-   :target: ../images/simulator/snapshot_list_slicing.svg
-   :alt: Snapshot List Slicing
-
-
-.. code-block:: python
+  .. code-block:: python
 
-   snapshot_list = env.snapshot_list
+    snapshot_list = env.snapshot_list
 
-   # Get max size of snapshots (in memory).
-   print(f"Max snapshot size: {len(snapshot_list)}")
+    # Get max size of snapshots (in memory).
+    print(f"Max snapshot size: {len(snapshot_list)}")
 
-   # Get snapshots of a specific node type.
-   test_nodes_snapshots = snapshot_list["test_nodes"]
+    # Get snapshots of a specific node type.
+    test_nodes_snapshots = snapshot_list["test_nodes"]
 
-   # Get node instance amount.
-   print(f"Number of test_nodes in the frame: {len(test_nodes_snapshots)}")
+    # Get node instance amount.
+    print(f"Number of test_nodes in the frame: {len(test_nodes_snapshots)}")
 
-   # Query one attribute on all frames and nodes.
-   states = test_nodes_snapshots[::"int_attribute"]
+    # Query one attribute on all frames and nodes.
+    states = test_nodes_snapshots[::"int_attribute"]
 
-   # Query two attributes on all frames and nodes.
-   states = test_nodes_snapshots[::["int_attribute", "float_attribute"]]
+    # Query two attributes on all frames and nodes.
+    states = test_nodes_snapshots[::["int_attribute", "float_attribute"]]
 
-   # Query one attribute on all frame and the first node.
-   states = test_nodes_snapshots[:0:"int_attribute"]
+    # Query one attribute on all frame and the first node.
+    states = test_nodes_snapshots[:0:"int_attribute"]
 
-   # Query attribute by node index list.
-   states = test_nodes_snapshots[:[0, 1, 2]:"int_attribute"]
+    # Query attribute by node index list.
+    states = test_nodes_snapshots[:[0, 1, 2]:"int_attribute"]
 
-   # Query one attribute on the first frame and the first node.
-   states = test_nodes_snapshots[0:0:"int_attribute"]
+    # Query one attribute on the first frame and the first node.
+    states = test_nodes_snapshots[0:0:"int_attribute"]
 
-   # Query attribute by frame index list.
-   states = test_nodes_snapshots[[0, 1, 2]: 0: "int_attribute"]
+    # Query attribute by frame index list.
+    states = test_nodes_snapshots[[0, 1, 2]: 0: "int_attribute"]
diff --git a/docs/source/key_components/distributed_toolkit.rst b/docs/source/key_components/distributed_toolkit.rst
index a0df12d93..9f231de40 100644
--- a/docs/source/key_components/distributed_toolkit.rst
+++ b/docs/source/key_components/distributed_toolkit.rst
@@ -5,18 +5,15 @@ Distributed Toolkit
 MARO distributed toolkit provides a unified, fast, and infrastructure-independent
 interface to support RL distributed training.
 
-
 .. image:: ../images/distributed/overview.svg
    :target: ../images/distributed/overview.svg
    :alt: Overview
 
-
 As shown in the overall architecture diagram above, MARO distributed toolkit
 follows a message-passing pattern that the cooperation between different components
 is based on the messages sending and receiving. A typical master/worker distributed
 program usually contains the following steps:
 
-
 #. The master component will send tasks(w/ or w/o data) to the worker components;
 #. The worker components will finish the tasks in their local computing environments
    or the local devices;
@@ -30,22 +27,17 @@ Key Components
 
 There are two key components in the distributed toolkit:
 
-
 .. image:: ../images/distributed/key_components.svg
    :target: ../images/distributed/key_components.svg
    :alt: Key Components
 
-
-
-* 
-  **Communication**\ : It provides the general message passing interfaces, such as
+* **Communication**\ : It provides the general message passing interfaces, such as
   ``(i)send``\ , ``receive``\ , ``(i)broadcast``\ , ``(i)scatter``\ , etc. The communication
   component use a replaceable communication protocol driver to adopt different
   communication protocol stack (e.g. `TCP/IP <https://en.wikipedia.org/wiki/Internet_protocol_suite>`_\ ,
   `InfiniBand <https://en.wikipedia.org/wiki/InfiniBand#:~:text=InfiniBand%20(IB>`_\ %20is%20a%20computer,both%20among%20and%20within%20computers.)
   ). Check the `distributed communication <./communication.html>`_ to get more details.
 
-* 
-  **Orchestration**\ : It primarily provides a unified interface for cluster
+* **Orchestration**\ : It primarily provides a unified interface for cluster
   management and job management on different infrastructures. Check the
   `distributed orchestration <./orchestration.html>`_ to get more details.
diff --git a/docs/source/key_components/event_buffer.rst b/docs/source/key_components/event_buffer.rst
index 5188e6746..72a18debf 100644
--- a/docs/source/key_components/event_buffer.rst
+++ b/docs/source/key_components/event_buffer.rst
@@ -22,11 +22,10 @@ the FIFO rule. Currently, only a single-thread version event buffer is provided.
    # Execute events at a specific tick.
    executed_events = event_buffer.execute(tick)
 
-
 .. image:: ../images/simulator/event_buffer.svg
    :target: ../images/simulator/event_buffer.svg
    :alt: Event Buffer
-
+   :width: 700
 
 Event Category
 --------------
@@ -34,7 +33,6 @@ Event Category
 To simplify the implementation of the business logic, MARO provides two kinds of
 basic event types, which can be used to construct various event execution pattern:
 
-
 * **Atom event** is an event without any dependence. An atom event will be
   immediately popped out from the event buffer after execution.
 * **Cascade event** is a series of events with dependencies. An internal event
@@ -49,7 +47,6 @@ Event Format
 We provide a general-purpose event format for all the scenarios in MARO. A legal
 event generally contains the following properties:
 
-
 * **tick** (int): The execution tick of this event.
 * **event_type** (int): The type of this event. It is a customized field, the
   default value is 0 (PREDEFINE_EVENT_ACTION).
@@ -64,8 +61,6 @@ event generally contains the following properties:
 * **state** (EventState): The state of this event. Valid values include PENDING,
   EXECUTING and FINISHED. The figure below indicates the state changing of an event:
 
-
 .. image:: ../images/simulator/event_state.svg
    :target: ../images/simulator/event_state.svg
    :alt: Event State
-
diff --git a/docs/source/key_components/orchestration.rst b/docs/source/key_components/orchestration.rst
index 67668be97..4761437bb 100644
--- a/docs/source/key_components/orchestration.rst
+++ b/docs/source/key_components/orchestration.rst
@@ -10,11 +10,10 @@ are dockerized for easy deployment and resource allocation. It provides a unifie
 abstraction/interface for different orchestration framework
 (e.g. `Grass <#grass>`_\ , `Kubernetes <#kubernetes>`_\ ).
 
-
 .. image:: ../images/distributed/orch_overview.svg
    :target: ../images/distributed/orch_overview.svg
    :alt: Orchestration Overview
-
+   :width: 600
 
 Grass
 -----
@@ -24,14 +23,12 @@ confidently applied to small/middle size cluster (< 200 nodes). The design goal
 of Grass is to speed up the distributed algorithm prototype development.
 It has the following advantages:
 
-
 * Fast deployment in a small cluster.
 * Fine-grained resource management.
 * Lightweight, no other dependencies are required.
 
 In the Grass mode:
 
-
 * All VMs will be deployed in the same virtual network for a faster, more stable
   connection and larger bandwidth. Please note that the maximum number of VMs is
   limited by the `available dedicated IP addresses <https://docs.microsoft.com/en-us/azure/virtual-network/virtual-networks-faq#what-address-ranges-can-i-use-in-my-vnets>`_.
@@ -43,11 +40,10 @@ In the Grass mode:
 Check `Grass Cluster Provisioning on Azure <../installation/grass_cluster_provisioning_on_azure.html>`_
 to get how to use it.
 
-
 .. image:: ../images/distributed/orch_grass.svg
    :target: ../images/distributed/orch_grass.svg
    :alt: Orchestration Grass Mode in Azure
-
+   :width: 600
 
 Kubernetes
 ----------
@@ -56,13 +52,11 @@ MARO also supports Kubernetes (k8s) as an orchestration option.
 With this widely used framework, you can easily build up your training cluster
 with hundreds and thousands of nodes. It has the following advantages:
 
-
 * Higher durability.
 * Better scalability.
 
 In the Kubernetes mode:
 
-
 * The dockerized job component runs in Kubernetes pod, and each pod only hosts
   one component.
 * All Kubernetes pods are registered into the same virtual network using
@@ -71,8 +65,7 @@ In the Kubernetes mode:
 Check `K8S Cluster Provisioning on Azure <../installation/k8s_cluster_provisioning_on_azure.html>`_
 to get how to use it.
 
-
 .. image:: ../images/distributed/orch_k8s.svg
    :target: ../images/distributed/orch_k8s.svg
    :alt: Orchestration K8S Mode in Azure
-
+   :width: 600
diff --git a/docs/source/key_components/rl_toolkit.rst b/docs/source/key_components/rl_toolkit.rst
index 635aa9597..857023da8 100644
--- a/docs/source/key_components/rl_toolkit.rst
+++ b/docs/source/key_components/rl_toolkit.rst
@@ -12,61 +12,57 @@ scenarios in a scalable way. The main abstractions include
 Learner and Actor
 -----------------
 
-
 .. image:: ../images/rl/overview.svg
    :target: ../images/rl/overview.svg
    :alt: RL Overview
 
-
-
 * **Learner** is the abstraction of the learnable policy. It is responsible for
   learning a qualified policy to improve the business optimized object.
 
-.. code-block:: python
+  .. code-block:: python
 
-   # Train function of learner.
-   def train(self, total_episodes):
-       for current_ep in range(total_episodes):
-           models = self._trainable_agents.get_models()
-           performance, experiences = self._actor.roll_out(models=models,
-                                                           epsilons=self._trainable_agents.explorer.epsilons,
-                                                           seed=self._seed)
-
-           self._trainable_agents.store_experiences(experiences)
-           self._trainable_agents.train()
-           self._trainable_agents.update_epsilon(performance)
+    # Train function of learner.
+    def train(self, total_episodes):
+        for current_ep in range(total_episodes):
+            models = self._trainable_agents.get_models()
+            performance, experiences = self._actor.roll_out(models=models,
+                                                            epsilons=self._trainable_agents.explorer.epsilons,
+                                                            seed=self._seed)
 
+            self._trainable_agents.store_experiences(experiences)
+            self._trainable_agents.train()
+            self._trainable_agents.update_epsilon(performance)
 
 * **Actor** is the abstraction of experience collection. It is responsible for
   interacting with the environment and collecting experience. The experiences
   collected during interaction will be used for the training of the learners.
 
-.. code-block:: python
+  .. code-block:: python
 
-   # Rollout function of actor.
-   def roll_out(self, models=None, epsilons=None, seed: int = None):
-       self._env.set_seed(seed)
+    # Rollout function of actor.
+    def roll_out(self, models=None, epsilons=None, seed: int = None):
+        self._env.set_seed(seed)
 
-       # Assign epsilon
-       if epsilons is not None:
-           self._inference_agents.explorer.epsilons = epsilons
+        # Assign epsilon
+        if epsilons is not None:
+            self._inference_agents.explorer.epsilons = epsilons
 
-       # Load models
-       if models is not None:
-           self._inference_agents.load_models(models)
+        # Load models
+        if models is not None:
+            self._inference_agents.load_models(models)
 
-       metrics, decision_event, is_done = self._env.step(None)
+        metrics, decision_event, is_done = self._env.step(None)
 
-       while not is_done:
-           action = self._inference_agents.choose_action(decision_event, self._env.snapshot_list)
-           metrics, decision_event, is_done = self._env.step(action)
-           self._inference_agents.on_env_feedback(metrics)
+        while not is_done:
+            action = self._inference_agents.choose_action(decision_event, self._env.snapshot_list)
+            metrics, decision_event, is_done = self._env.step(action)
+            self._inference_agents.on_env_feedback(metrics)
 
-       experiences = self._inference_agents.post_process(self._env.snapshot_list)
-       performance = self._env.metrics
-       self._env.reset()
+        experiences = self._inference_agents.post_process(self._env.snapshot_list)
+        performance = self._env.metrics
+        self._env.reset()
 
-       return {'local': performance}, experiences
+        return {'local': performance}, experiences
 
 Agent Manager
 -------------
@@ -95,12 +91,10 @@ Furthermore, to well serve the distributed algorithm (scalable), the agent
 manager provides two kinds of working modes, which can be applied in different
 distributed components, such as inference mode in actor, training mode in learner.
 
-
 .. image:: ../images/rl/agent_manager.svg
    :target: ../images/rl/agent_manager.svg
    :alt: Agent Manager
-
-
+   :width: 750
 
 * In **inference mode**\ , the agent manager is responsible to access and shape
   the environment state for the related agent, convert the model action to an
@@ -119,12 +113,10 @@ experience storage, sampling strategies, and training strategies. Since all kind
 of scenario-specific stuff will be handled by the agent manager, the agent is
 scenario agnostic.
 
-
 .. image:: ../images/rl/agent.svg
    :target: ../images/rl/agent.svg
    :alt: Agent
 
-
 .. code-block:: python
 
    class Agent(object):
@@ -148,7 +140,6 @@ scenario agnostic.
 
 Under the management of the agent manager:
 
-
 * In **inference mode**\ , given the shaped model state as input, the agent will
   output a model action (then the agent manager will shape it into an executable
   environment action). Also, at the end of each episode, the agent will fill the
@@ -164,12 +155,10 @@ problem. The model architecture, loss function, optimizer, and internal model
 update strategy are designed and parameterized here. In this module, two
 predefined interfaces must be implemented:
 
-
 .. image:: ../images/rl/algorithm.svg
    :target: ../images/rl/algorithm.svg
    :alt: Algorithm
-
-
+   :width: 650
 
 * ``choose_action`` is used to make a decision based on a provided model state.
 * ``train_on_batch`` is used to trigger training and the policy update from external.
@@ -201,7 +190,6 @@ MARO uses shapers to isolate business-related details and the algorithm modeling
 It provides a clean interactive surface for RL agent(s). The followings are the
 three usually used shapers in RL formulations:
 
-
 * **State shaper**\ : Given a decision event, the state shaper will extract relevant
   temporal-spatial information from the environment (snapshot list) for the decision
   agent. The output usually follows a format that can be directly inputted to the
@@ -229,27 +217,3 @@ three usually used shapers in RL formulations:
   experiences. By default, we provide a ``k-step return`` experience shaper for
   general usage, but for better performance, you need to carefully design this part
   according to your scenario and needs.
-
-
-.. raw:: html
-
-   <!-- At the end of each episode, the experience shaper will
-   shape the runtime transitions the agent collected to formatted learnable experiences.
-   The runtime transitions record the agent's interaction with the environment and
-   contain scenario-specific information (e.g. event information, business metrics)
-   that should be isolated from the RL algorithm. Therefore, the experience shaper
-   transforms it into *RL-friendly* `experiences` with `state`, `action`, and `reward`, etc. -->
-
-
-
-
-.. raw:: html
-
-   <!-- At the end of each episode, the experience shaper will
-   convert the agent's interaction trajectory to formatted learnable experiences.
-   The interaction trajectory is constructed by a continuous runtime transitions
-   which usually record the agent's interaction with the environment and contain
-   extra scenario-specific information (e.g. event information, business metrics).
-   A formatted learnable experience usually contains the field of `state`, `action`,
-   and `reward`, etc. -->
-
diff --git a/docs/source/key_components/simulation_toolkit.rst b/docs/source/key_components/simulation_toolkit.rst
index b27ed18d4..59f9022f4 100644
--- a/docs/source/key_components/simulation_toolkit.rst
+++ b/docs/source/key_components/simulation_toolkit.rst
@@ -15,7 +15,6 @@ Overview
 
 The key features of MARO simulation toolkit:
 
-
 * Event-driven, which is friendly to business logging data, and more in line
   with real-world scenarios;
 * High execution performance;
@@ -39,11 +38,10 @@ pluggable `Business Engine <./business_engine.html>`_ based on the
 `Event Buffer <./event_buffer.html>`_ supporting. The following figure illustrates
 the overall architecture of the MARO simulation system.
 
-
 .. image:: ../images/simulator/overview.svg
    :target: ../images/simulator/overview.svg
    :alt: Simulation System Overall
-
+   :width: 600
 
 Environment
 -----------
@@ -61,12 +59,10 @@ and how to conveniently query data on the levels of spatial, temporal, and
 intra-node (resource holder). The following shows a typical environment interaction
 workflow and code snippet.
 
-
 .. image:: ../images/simulator/interaction_workflow.svg
    :target: ../images/simulator/interaction_workflow.svg
    :alt: Agent(s)/Environment Interaction Workflow
 
-
 .. code-block:: python
 
    from maro.simulator import Env
diff --git a/docs/source/scenarios/citi_bike.rst b/docs/source/scenarios/citi_bike.rst
index 5b67d7a41..8b09db207 100644
--- a/docs/source/scenarios/citi_bike.rst
+++ b/docs/source/scenarios/citi_bike.rst
@@ -15,7 +15,6 @@ one-way bike trips based on the public trip data from
    to work or school, run errands, get to appointments or social engagements, and
    more.
 
-
 Since the demand for bikes and empty docks is dynamically changed during a day,
 and the bike flow between two stations are not equal in a same period, some
 stations suffer from severe bike shortages, while some have too much bikes and
@@ -32,7 +31,6 @@ Resource Flow
 In this scenario, the **bike** is the central resource. Two events will trigger
 the movement of the bike:
 
-
 * The first one is the trip requirement, which may cause the bike transfer from
   the source station to the destination station;
 * The second one is the repositioning operation. It is used to rebalance the bike
@@ -44,7 +42,6 @@ Bike Trip
 In the citi bike scenario in MARO, the trip generation and the corresponding bike
 flow is defined as follows:
 
-
 * Given a fixed time interval, for each specific source-destination station pair,
   a trip requirement will arise according to a predefined distribution or the real
   trip data. It depends on the chosen topology.
@@ -72,7 +69,6 @@ in the topology and can be customized based on different requirements.
 The target station candidates of the ``Supply`` and ``Demand`` events are selected
 by a predefined multi-layer filter in this scenario:
 
-
 #. The distance between the caller station and the neighboring stations will be
    used to filter and get a specific number of stations;
 #. The number of available bikes at each candidate station will be used to further
@@ -118,12 +114,10 @@ In toy topology, the generation of the trip requirements follows a stable patter
 as introduced above. The detailed trip demand pattern are listed as below. And we
 hope that these toy topologies can provide you with some insights about this scenario.
 
-
 .. image:: ../images/scenario/citibike.toys.svg
    :target: ../images/scenario/citibike.toys.svg
    :alt: Citi Bike toy topologies
 
-
 **toy.3s_4t**\ : There are three stations in this topology. Every two minutes,
 there will be a trip requirement from S2 to S3 and a trip requirement from S3 to
 S2. At the same time, every two minutes, the system will generate trip requirement
@@ -192,6 +186,7 @@ No Repositioning
      - 9,231
      - 0
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -249,6 +244,7 @@ No Repositioning
      - 15,876
      - 0
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -306,6 +302,7 @@ No Repositioning
      - 14,205
      - 0
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -339,7 +336,6 @@ No Repositioning
      - 7,608
      - 0
 
-
 Random Repositioning
 ~~~~~~~~~~~~~~~~~~~~
 
@@ -363,6 +359,7 @@ Random Repositioning
      - 7,513 :math:`\pm`    40
      - 3,242 :math:`\pm`    71
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -420,6 +417,7 @@ Random Repositioning
      - 26,067 :math:`\pm`   234
      - 457,173 :math:`\pm`  6,444
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -477,6 +475,7 @@ Random Repositioning
      - 20,119 :math:`\pm`   110
      - 437,311 :math:`\pm`  5,936
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -510,7 +509,6 @@ Random Repositioning
      - 11,741 :math:`\pm`   170
      - 349,932 :math:`\pm`  4,375
 
-
 Quick Start
 -----------
 
@@ -545,7 +543,6 @@ The data ``generate`` command is used to automatically download and build the sp
 predefined scenario and topology data files for the simulation. Currently, there
 are three arguments for the data ``generate`` command:
 
-
 * ``-s``\ : required, used to specify the predefined scenario. Valid scenarios are
   listed in the result of `environment list command <#environment-list-command>`_.
 * ``-t``\ : required, used to specify the predefined topology. Valid topologies are
@@ -580,7 +577,6 @@ The data ``convert`` command is used to convert the CSV data files to binary dat
 files that the simulator needs. Currently, there are three arguments for the data
 ``convert`` command:
 
-
 * ``--meta``\ : required, used to specify the path of the meta file. The source
   columns that to be converted and the data type of each columns should be
   specified in the meta file.
@@ -608,7 +604,6 @@ Once the environment need the agent's response to reposition bikes, it will
 throw an ``DecisionEvent``. In the scenario of Citi Bike, the information of each
 ``DecisionEvent`` is listed as below:
 
-
 * **station_idx** (int): The id of the station/agent that needs to respond to the
   environment.
 * **tick** (int): The corresponding tick.
@@ -641,7 +636,6 @@ Action
 Once we get a ``DecisionEvent`` from the environment, we should respond with an
 ``Action``. Valid ``Action`` could be:
 
-
 * ``None``\ , which means do nothing.
 * A valid ``Action`` instance, including:
 
@@ -748,5 +742,5 @@ random mode, we hope this could help you learn how to use the environment interf
        print(f"ep: {ep}, environment metrics: {env.metrics}")
        env.reset()
 
-Jump to `this notebook <https://github.com/microsoft/maro/tree/master/notebooks/bike_repositioning>`_
+Jump to `this notebook <https://github.com/microsoft/maro/tree/master/notebooks/bike_repositioning/interact_with_environment.ipynb>`_
 for a quick experience.
diff --git a/docs/source/scenarios/container_inventory_management.rst b/docs/source/scenarios/container_inventory_management.rst
index 3d2927ec5..b970fb943 100644
--- a/docs/source/scenarios/container_inventory_management.rst
+++ b/docs/source/scenarios/container_inventory_management.rst
@@ -17,18 +17,15 @@ Resource Flow
 In this scenario, the **container** is the central resource. Two events will
 trigger the movement of the container:
 
-
 * The first one is the order, which will lead to the transportation of goods from
   the source port to the destination port.
 * The second one is the repositioning operation. It is used to rebalance the
   container distribution worldwide.
 
-
 .. image:: ../images/scenario/cim.container_flow.svg
    :target: ../images/scenario/cim.container_flow.svg
    :alt: The Life Cycle of the Container
 
-
 Order
 ^^^^^
 
@@ -43,7 +40,6 @@ customized based on different requirements.
 An order will trigger a life cycle of a container, as shown in the figure above,
 a life cycle is defined as follows:
 
-
 * Once an order is generated, an empty container of the corresponding export port
   (source port) will be released to the shipper.
 * The shipper will fill the container with cargo which turns it into a laden and
@@ -63,7 +59,6 @@ As mentioned above, to rebalance the container distribution, the agent in each
 port will decide how to reposition the empty containers every time a vessel
 arrives at the port. The decision consists of two parts:
 
-
 * Whether to take a ``discharge`` operation or a ``load`` operation;
 * The number of containers to discharge/load.
 
@@ -95,12 +90,10 @@ problem.
 Toy Topologies
 ^^^^^^^^^^^^^^
 
-
 .. image:: ../images/scenario/cim.toys.svg
    :target: ../images/scenario/cim.toys.svg
    :alt: CIM toy topologies
 
-
 *(In these topologies, the solid lines indicate the service route (voyage) among
 ports, while the dashed lines indicate the container flow triggered by orders.)*
 
@@ -143,12 +136,10 @@ ports no longer have a simple supply/demand feature. The cooperation among ports
 is much more complex and it is difficult to find an efficient repositioning policy
 manually.
 
-
 .. image:: ../images/scenario/cim.global_trade.svg
    :target: ../images/scenario/cim.global_trade.svg
    :alt: global_trade.22p
 
-
 *(To make it clearer, the figure above only shows the service routes among ports.)*
 
 Naive Baseline
@@ -205,6 +196,7 @@ No Repositioning
      - 2,191,716
      - 0
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -250,6 +242,7 @@ No Repositioning
      - 2,141,716
      - 0
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -295,6 +288,7 @@ No Repositioning
      - 2,088,716
      - 0
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -340,7 +334,6 @@ No Repositioning
      - 1,161,714
      - 0
 
-
 Random Repositioning
 ~~~~~~~~~~~~~~~~~~~~
 
@@ -388,6 +381,7 @@ Random Repositioning
      - 1,603,063 :math:`\pm` 109,149
      - 2,518,920 :math:`\pm` 193,200
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -433,6 +427,7 @@ Random Repositioning
      - 1,763,950 :math:`\pm`  73,935
      - 1,858,420 :math:`\pm`  60,680
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -478,6 +473,7 @@ Random Repositioning
      - 1,761,283 :math:`\pm` 22,338
      - 1,336,540 :math:`\pm`  30,020
 
+|
 
 .. list-table::
    :header-rows: 1
@@ -523,7 +519,6 @@ Random Repositioning
      - 1,154,184 :math:`\pm`  7,043
      - 270,960 :math:`\pm`   2,240
 
-
 Quick Start
 -----------
 
@@ -549,7 +544,6 @@ Once the environment need the agent's response to promote the simulation, it wil
 throw an ``DecisionEvent``. In the scenario of CIM, the information of each
 ``DecisionEvent`` is listed as below:
 
-
 * **tick** (int): The corresponding tick.
 * **port_idx** (int): The id of the port/agent that needs to respond to the
   environment.
@@ -572,7 +566,6 @@ Action
 Once we get a ``DecisionEvent`` from the environment, we should respond with an
 ``Action``. Valid ``Action`` could be:
 
-
 * ``None``\ , which means do nothing.
 * A valid ``Action`` instance, including:
 
@@ -646,5 +639,5 @@ random mode, we hope this could help you learn how to use the environment interf
        print(f"ep: {ep}, environment metrics: {env.metrics}")
        env.reset()
 
-Jump to `this notebook <https://github.com/microsoft/maro/tree/master/notebooks/container_inventory_management>`_
+Jump to `this notebook <https://github.com/microsoft/maro/tree/master/notebooks/container_inventory_management/interact_with_environment.ipynb>`_
 for a quick experience.