From 38c0bc2dd02e0aef4b6f9ae64319c818dbf78ecc Mon Sep 17 00:00:00 2001 From: Jinyu-W <53509467+Jinyu-W@users.noreply.github.com> Date: Wed, 30 Sep 2020 10:58:46 +0800 Subject: [PATCH] doc refined (#122) * doc refined * tailing space removed Co-authored-by: Jinyu Wang --- docs/source/conf.py | 2 +- docs/source/index.rst | 2 +- .../grass_cluster_provisioning_on_azure.rst | 88 ++++---- .../k8s_cluster_provisioning_on_azure.rst | 77 +++---- docs/source/installation/pip_install.rst | 56 ++--- docs/source/installation/playground.rst | 7 +- .../source/key_components/business_engine.rst | 3 - docs/source/key_components/communication.rst | 37 ++-- docs/source/key_components/data_model.rst | 209 ++++++++---------- .../key_components/distributed_toolkit.rst | 12 +- docs/source/key_components/event_buffer.rst | 7 +- docs/source/key_components/orchestration.rst | 13 +- docs/source/key_components/rl_toolkit.rst | 100 +++------ .../key_components/simulation_toolkit.rst | 6 +- docs/source/scenarios/citi_bike.rst | 20 +- .../container_inventory_management.rst | 21 +- 16 files changed, 256 insertions(+), 404 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 751c0a194..29ea9cc92 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -40,7 +40,7 @@ "sphinx.ext.coverage", "sphinx.ext.napoleon", "sphinx.ext.viewcode", - "sphinx_markdown_tables" + "sphinx_markdown_tables", ] napoleon_google_docstring = True diff --git a/docs/source/index.rst b/docs/source/index.rst index 261a14836..9582d8d12 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,7 +10,7 @@ Multi-Agent Resource Optimization (MARO) platform is an instance of Reinforcemen learning as a Service (RaaS) for real-world resource optimization. It can be applied to many important industrial domains, such as container inventory management in logistics, bike repositioning in transportation, virtual machine -provisioning in data centers, and asset management in finance. Besides +provisioning in data centers, and asset management in finance. Besides `Reinforcement Learning `_ (RL), it also supports other planning/decision mechanisms, such as `Operations Research `_. diff --git a/docs/source/installation/grass_cluster_provisioning_on_azure.rst b/docs/source/installation/grass_cluster_provisioning_on_azure.rst index ac895b05f..39b05ff40 100644 --- a/docs/source/installation/grass_cluster_provisioning_on_azure.rst +++ b/docs/source/installation/grass_cluster_provisioning_on_azure.rst @@ -9,7 +9,6 @@ on Azure and run your training job in a distributed environment. Prerequisites ------------- - * `Install the Azure CLI and login `_ * `Install docker `_ and `Configure docker to make sure it can be managed as a non-root user `_ @@ -17,103 +16,92 @@ Prerequisites Cluster Management ------------------ - * Create a cluster with a `deployment <#grass-azure-create>`_ -.. code-block:: sh - - # Create a grass cluster with a grass-create deployment - maro grass create ./grass-azure-create.yml + .. code-block:: sh + # Create a grass cluster with a grass-create deployment + maro grass create ./grass-azure-create.yml * Scale the cluster -.. code-block:: sh + .. code-block:: sh - # Scale nodes with 'Standard_D4s_v3' specification to 2 - maro grass node scale my_grass_cluster Standard_D4s_v3 2 - -Check `VM Size `_ -to see more node specifications. + # Scale nodes with 'Standard_D4s_v3' specification to 2 + maro grass node scale my_grass_cluster Standard_D4s_v3 2 + Check `VM Size `_ + to see more node specifications. * Delete the cluster -.. code-block:: sh - - # Delete a grass cluster - maro grass delete my_grass_cluster + .. code-block:: sh + # Delete a grass cluster + maro grass delete my_grass_cluster * Start/stop nodes to save costs -.. code-block:: sh + .. code-block:: sh - # Start 2 nodes with 'Standard_D4s_v3' specification - maro grass node start my_grass_cluster Standard_D4s_v3 2 + # Start 2 nodes with 'Standard_D4s_v3' specification + maro grass node start my_grass_cluster Standard_D4s_v3 2 - # Stop 2 nodes with 'Standard_D4s_v3' specification - maro grass node stop my_grass_cluster Standard_D4s_v3 2 + # Stop 2 nodes with 'Standard_D4s_v3' specification + maro grass node stop my_grass_cluster Standard_D4s_v3 2 Run Job ------- - * Push your training image -.. code-block:: sh - - # Push image 'my_image' to the cluster - maro grass image push my_grass_cluster --image-name my_image + .. code-block:: sh + # Push image 'my_image' to the cluster + maro grass image push my_grass_cluster --image-name my_image * Push your training data -.. code-block:: sh - - # Push data under './my_training_data' to a relative path '/my_training_data' in the cluster - # You can then assign your mapping location in the start-job deployment - maro grass data push my_grass_cluster ./my_training_data/* /my_training_data + .. code-block:: sh + # Push data under './my_training_data' to a relative path '/my_training_data' in the cluster + # You can then assign your mapping location in the start-job deployment + maro grass data push my_grass_cluster ./my_training_data/* /my_training_data * Start a training job with a `deployment <#grass-start-job>`_ -.. code-block:: sh - - # Start a training job with a start-job deployment - maro grass job start my_grass_cluster ./grass-start-job.yml + .. code-block:: sh + # Start a training job with a start-job deployment + maro grass job start my_grass_cluster ./grass-start-job.yml * Or, schedule batch jobs with a `deployment <#grass-start-schedule>`_ -.. code-block:: sh - - # Start a training schedule with a start-schedule deployment - maro grass schedule start my_grass_cluster ./grass-start-schedule.yml + .. code-block:: sh + # Start a training schedule with a start-schedule deployment + maro grass schedule start my_grass_cluster ./grass-start-schedule.yml * Get the logs of the job -.. code-block:: sh - - # Get the logs of the job - maro grass job logs my_grass_cluster my_job_1 + .. code-block:: sh + # Get the logs of the job + maro grass job logs my_grass_cluster my_job_1 * List the current status of the job -.. code-block:: sh - - # List the current status of the job - maro grass job list my_grass_cluster + .. code-block:: sh + # List the current status of the job + maro grass job list my_grass_cluster * Stop a training job -.. code-block:: sh + .. code-block:: sh - # Stop a training job - maro grass job stop my_job_1 + # Stop a training job + maro grass job stop my_job_1 Sample Deployments ------------------ diff --git a/docs/source/installation/k8s_cluster_provisioning_on_azure.rst b/docs/source/installation/k8s_cluster_provisioning_on_azure.rst index d3182e047..26dbdd664 100644 --- a/docs/source/installation/k8s_cluster_provisioning_on_azure.rst +++ b/docs/source/installation/k8s_cluster_provisioning_on_azure.rst @@ -9,7 +9,6 @@ on Azure and run your training job in a distributed environment. Prerequisites ------------- - * `Install the Azure CLI and login `_ * `Install and set up kubectl `_ * `Install docker `_ and @@ -18,92 +17,82 @@ Prerequisites Cluster Management ------------------ - * Create a cluster with a `deployment <#k8s-azure-create>`_ -.. code-block:: sh - - # Create a k8s cluster - maro k8s create ./k8s-azure-create.yml + .. code-block:: sh + # Create a k8s cluster + maro k8s create ./k8s-azure-create.yml * Scale the cluster -.. code-block:: sh + .. code-block:: sh - # Scale nodes with 'Standard_D4s_v3' specification to 2 - maro k8s node scale my_k8s_cluster Standard_D4s_v3 2 - -Check `VM Size `_ -to see more node specifications. + # Scale nodes with 'Standard_D4s_v3' specification to 2 + maro k8s node scale my_k8s_cluster Standard_D4s_v3 2 + Check `VM Size `_ + to see more node specifications. * Delete the cluster -.. code-block:: sh + .. code-block:: sh - # Delete a k8s cluster - maro k8s delete my_k8s_cluster + # Delete a k8s cluster + maro k8s delete my_k8s_cluster Run Job ------- - * Push your training image -.. code-block:: sh - - # Push image 'my_image' to the cluster - maro k8s image push my_k8s_cluster --image-name my_image + .. code-block:: sh + # Push image 'my_image' to the cluster + maro k8s image push my_k8s_cluster --image-name my_image * Push your training data -.. code-block:: sh - - # Push data under './my_training_data' to a relative path '/my_training_data' in the cluster - # You can then assign your mapping location in the start-job deployment - maro k8s data push my_k8s_cluster ./my_training_data/* /my_training_data + .. code-block:: sh + # Push data under './my_training_data' to a relative path '/my_training_data' in the cluster + # You can then assign your mapping location in the start-job deployment + maro k8s data push my_k8s_cluster ./my_training_data/* /my_training_data * Start a training job with a `deployment <#k8s-start-job>`_ -.. code-block:: sh - - # Start a training job with a start-job deployment - maro k8s job start my_k8s_cluster ./k8s-start-job.yml + .. code-block:: sh + # Start a training job with a start-job deployment + maro k8s job start my_k8s_cluster ./k8s-start-job.yml * Or, schedule batch jobs with a `deployment <#k8s-start-schedule>`_ -.. code-block:: sh - - # Start a training schedule with a start-schedule deployment - maro k8s schedule start my_k8s123_cluster ./k8s-start-schedule.yml + .. code-block:: sh + # Start a training schedule with a start-schedule deployment + maro k8s schedule start my_k8s123_cluster ./k8s-start-schedule.yml * Get the logs of the job -.. code-block:: sh - - # Logs will be exported to current directory - maro k8s job logs my_k8s_cluster my_job_1 + .. code-block:: sh + # Logs will be exported to current directory + maro k8s job logs my_k8s_cluster my_job_1 * List the current status of the job -.. code-block:: sh - - # List current status of jobs - maro k8s job list my_k8s_cluster my_job_1 + .. code-block:: sh + # List current status of jobs + maro k8s job list my_k8s_cluster my_job_1 * Stop a training job -.. code-block:: sh + .. code-block:: sh - # Stop a training job - maro k8s job stop my_k8s_cluster my_job_1 + # Stop a training job + maro k8s job stop my_k8s_cluster my_job_1 Sample Deployments ------------------ diff --git a/docs/source/installation/pip_install.rst b/docs/source/installation/pip_install.rst index 6796a9e16..3616a9e86 100644 --- a/docs/source/installation/pip_install.rst +++ b/docs/source/installation/pip_install.rst @@ -5,31 +5,25 @@ Package Install MARO from `PyPI `_ ---------------------------------------------------------------------- - -* - Max OS / Linux +* Max OS / Linux .. code-block:: sh - pip install pymaro + pip install pymaro -* - Windows +* Windows - .. code-block:: powershell + .. code-block:: - # Install torch first, if you don't have one. - pip install torch===1.6.0 torchvision===0.7.0 -f https://download.pytorch.org/whl/torch_stable.html + # Install torch first, if you don't have one. + pip install torch===1.6.0 torchvision===0.7.0 -f https://download.pytorch.org/whl/torch_stable.html - pip install pymaro + pip install pymaro Install MARO from Source (\ `Editable Mode `_\ ) ------------------------------------------------------------------------------------------------------------------------ - -* - Prerequisites - +* Prerequisites * `Python >= 3.6, < 3.8 `_ * C++ Compiler @@ -37,12 +31,9 @@ Install MARO from Source (\ `Editable Mode `_ -* - Enable Virtual Environment - +* Enable Virtual Environment - * - Mac OS / Linux + * Mac OS / Linux .. code-block:: sh @@ -50,31 +41,26 @@ Install MARO from Source (\ `Editable Mode http://127.0.0.1:40011 docker run -p 40009:40009 -p 40010:40010 -p 40011:40011 maro/playground:cpu -* - Windows +* Windows .. code-block:: diff --git a/docs/source/key_components/business_engine.rst b/docs/source/key_components/business_engine.rst index f68e2f6b0..7145fb961 100644 --- a/docs/source/key_components/business_engine.rst +++ b/docs/source/key_components/business_engine.rst @@ -8,7 +8,6 @@ engines should be bind to it. The business engine is responsible for defining: - * **Business instance**. Generally, the business instances are the resource holders in the business logic. For example: @@ -47,12 +46,10 @@ driven by these business events. In short, with the uniformed business engine interface, the simulation of different business scenarios is only based on the pluggable business engine (scenario-specific). - .. image:: ../images/simulator/business_engine.svg :target: ../images/simulator/business_engine.svg :alt: Business Engine - Generally, the business time series data is read from the historical log or generated by a data generation model. Currently, for topologies in Citi Bike scenario, data processing is needed before starting the simulation. You can find diff --git a/docs/source/key_components/communication.rst b/docs/source/key_components/communication.rst index 7787f2332..fa6926af1 100644 --- a/docs/source/key_components/communication.rst +++ b/docs/source/key_components/communication.rst @@ -16,12 +16,10 @@ the underlying driver is pluggable based on the real requirements. Currently, we use `ZeroMQ `_ as the default choice. Proxy also provides support for peer discovering based on `Redis `_. - .. image:: ../images/distributed/proxy.svg :target: ../images/distributed/proxy.svg :alt: Proxy - Message ^^^^^^^ @@ -29,7 +27,6 @@ Message is designed for general purpose, it is used to package the communication content between components. The main attributes of a message instance include: - * ``tag``\ : A customized attribute, it can be used to implement the auto-dispatching logic with a `conditional event register table <#conditional-event-register-table>`_. * ``source``\ : The alias of the message sender. @@ -53,7 +50,6 @@ Session Message We provide two kinds of predefined session types for common distributed scenarios: - * **Task Session**\ : It is used to describe a computing task sent from master to worker. Three stages are included: @@ -92,7 +88,6 @@ both blocking and non-blocking cases. These primitives are decoupled from the underlying implementation of the communication driver (protocol). The main primitives are listed below: - * ``send``\ : Unicast. It is a blocking, one-to-one sending mode. It will watch and collect the reply message from the remote peer. * ``isend``\ : The non-blocking version of the ``send``. @@ -119,18 +114,15 @@ By registering the ``conditional event`` and related ``handler function`` to the register table, the handler function will be automatically executed with the received messages when the event conditions are met. - .. image:: ../images/distributed/register_table.register.svg :target: ../images/distributed/register_table.register.svg :alt: Register Table - ``Conditional event`` is used to declare the required message group for auto-triggering the related handler function. The unit event is the minimal component in the conditional event, it follows a three-stage format: `source`:`tag`:\ ``amount``. - * ``source``\ : It is used to declare the required message source. The longest-prefix matching is supported. @@ -145,45 +137,42 @@ it follows a three-stage format: `source`:`tag`:\ ``amount``. * ``%`` is used to represent the relative percentages, such as 60%, 10%, etc. -.. code-block:: python + .. code-block:: python - unit_event_abs = "worker:update:10" + unit_event_abs = "worker:update:10" - unit_event_rel = "worker:update:60%" + unit_event_rel = "worker:update:60%" To support more complex business logic, we provide two operations: ``AND`` and ``OR`` to combine unit events up: - * ``AND``\ : Valid for multiple unit events and combined unit events. The combined event condition is met if all the conditions of the sub-events are met. * ``OR``\ : Valid for multiple unit events and combined unit events. The combined event condition is met if any sub-event meets the condition. -.. code-block:: python + .. code-block:: python - combined_event_and = ("worker_01:update:2", - "worker_02:update:3", - "AND") + combined_event_and = ("worker_01:update:2", + "worker_02:update:3", + "AND") - combined_event_or = ("worker_03:update:1", - "worker_04:update:5", - "OR") + combined_event_or = ("worker_03:update:1", + "worker_04:update:5", + "OR") - combined_event_mix = (("worker_01:update:2", "worker_02:update:3", "AND"), - "worker_03:update:1", - "OR") + combined_event_mix = (("worker_01:update:2", "worker_02:update:3", "AND"), + "worker_03:update:1", + "OR") ``Handler function`` is a user-defined callback function that is bind to a specific conditional event. When the condition of the event is met, the related messages will be sent to the handler function for its execution. - .. image:: ../images/distributed/register_table.trigger.svg :target: ../images/distributed/register_table.trigger.svg :alt: Register Table - .. code-block:: python # A common handler function signature diff --git a/docs/source/key_components/data_model.rst b/docs/source/key_components/data_model.rst index 3b96878e9..5bcaff3d7 100644 --- a/docs/source/key_components/data_model.rst +++ b/docs/source/key_components/data_model.rst @@ -11,15 +11,13 @@ implementation based on their real performance requirement and device limitation Key Concepts ------------ - .. image:: ../images/simulator/key_concepts.svg :target: ../images/simulator/key_concepts.svg :alt: Key Concepts - + :width: 220 As shown in the figure above, there are some key concepts in the data model: - * **Node** is the abstraction of the resource holder, which is usually the major business instance of the scenario (i.e. vessels and ports in CIM scenario). A node usually has various attributes to present the business nature. @@ -36,113 +34,98 @@ As shown in the figure above, there are some key concepts in the data model: * **Snapshot List** is the dumped frames based on a pre-defined resolution. It captures the aggregated changes of the environment between the dump points. - -.. raw:: html - - - - - Use Case -------- - * Below is the declaration of a retail frame, which includes warehouse and store nodes. -.. code-block:: python - - from maro.backends.frame import node, NodeAttribute, NodeBase, FrameNode, FrameBase + .. code-block:: python - TOTAL_PRODUCT_CATEGORIES = 10 - TOTAL_STORES = 8 - TOTAL_WAREHOUSES = 2 - TOTAL_SNAPSHOT = 100 + from maro.backends.frame import node, NodeAttribute, NodeBase, FrameNode, FrameBase + TOTAL_PRODUCT_CATEGORIES = 10 + TOTAL_STORES = 8 + TOTAL_WAREHOUSES = 2 + TOTAL_SNAPSHOT = 100 - @node("warehouse") - class Warehouse(NodeBase): - inventories = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES) - shortages = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES) - def __init__(self): - self._init_inventories = [100 * (i + 1) for i in range(TOTAL_PRODUCT_CATEGORIES)] - self._init_shortages = [0] * TOTAL_PRODUCT_CATEGORIES + @node("warehouse") + class Warehouse(NodeBase): + inventories = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES) + shortages = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES) - def reset(self): - self.inventories[:] = self._init_inventories - self.shortages[:] = self._init_shortages + def __init__(self): + self._init_inventories = [100 * (i + 1) for i in range(TOTAL_PRODUCT_CATEGORIES)] + self._init_shortages = [0] * TOTAL_PRODUCT_CATEGORIES + def reset(self): + self.inventories[:] = self._init_inventories + self.shortages[:] = self._init_shortages - @node("store") - class Store(NodeBase): - inventories = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES) - shortages = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES) - sales = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES) - def __init__(self): - self._init_inventories = [10 * (i + 1) for i in range(TOTAL_PRODUCT_CATEGORIES)] - self._init_shortages = [0] * TOTAL_PRODUCT_CATEGORIES - self._init_sales = [0] * TOTAL_PRODUCT_CATEGORIES + @node("store") + class Store(NodeBase): + inventories = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES) + shortages = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES) + sales = NodeAttribute("i", TOTAL_PRODUCT_CATEGORIES) - def reset(self): - self.inventories[:] = self._init_inventories - self.shortages[:] = self._init_shortages - self.sales[:] = self._init_sales + def __init__(self): + self._init_inventories = [10 * (i + 1) for i in range(TOTAL_PRODUCT_CATEGORIES)] + self._init_shortages = [0] * TOTAL_PRODUCT_CATEGORIES + self._init_sales = [0] * TOTAL_PRODUCT_CATEGORIES + def reset(self): + self.inventories[:] = self._init_inventories + self.shortages[:] = self._init_shortages + self.sales[:] = self._init_sales - class RetailFrame(FrameBase): - warehouses = FrameNode(Warehouse, TOTAL_WAREHOUSES) - stores = FrameNode(Store, TOTAL_STORES) - def __init__(self): - # If your actual frame number was more than the total snapshot number, the old snapshots would be rolling replaced. - super().__init__(enable_snapshot=True, total_snapshot=TOTAL_SNAPSHOT) + class RetailFrame(FrameBase): + warehouses = FrameNode(Warehouse, TOTAL_WAREHOUSES) + stores = FrameNode(Store, TOTAL_STORES) + def __init__(self): + # If your actual frame number was more than the total snapshot number, the old snapshots would be rolling replaced. + super().__init__(enable_snapshot=True, total_snapshot=TOTAL_SNAPSHOT) * The operations on the retail frame. -.. code-block:: python + .. code-block:: python - retail_frame = RetailFrame() + retail_frame = RetailFrame() - # Fulfill the initialization values to the backend memory. - for store in retail_frame.stores: - store.reset() + # Fulfill the initialization values to the backend memory. + for store in retail_frame.stores: + store.reset() - # Fulfill the initialization values to the backend memory. - for warehouse in retail_frame.warehouses: - warehouse.reset() + # Fulfill the initialization values to the backend memory. + for warehouse in retail_frame.warehouses: + warehouse.reset() - # Take a snapshot of the first tick frame. - retail_frame.take_snapshot(0) - snapshot_list = retail_frame.snapshots - print(f"Max snapshot list capacity: {len(snapshot_list)}") + # Take a snapshot of the first tick frame. + retail_frame.take_snapshot(0) + snapshot_list = retail_frame.snapshots + print(f"Max snapshot list capacity: {len(snapshot_list)}") - # Query sales, inventory information of all stores at first tick, len(snapshot_list["store"]) equals to TOTAL_STORES. - all_stores_info = snapshot_list["store"][0::["sales", "inventories"]].reshape(TOTAL_STORES, -1) - print(f"All stores information at first tick (numpy array): {all_stores_info}") + # Query sales, inventory information of all stores at first tick, len(snapshot_list["store"]) equals to TOTAL_STORES. + all_stores_info = snapshot_list["store"][0::["sales", "inventories"]].reshape(TOTAL_STORES, -1) + print(f"All stores information at first tick (numpy array): {all_stores_info}") - # Query shortage information of first store at first tick. - first_store_shortage = snapshot_list["store"][0:0:"shortages"] - print(f"First store shortages at first tick (numpy array): {first_store_shortage}") + # Query shortage information of first store at first tick. + first_store_shortage = snapshot_list["store"][0:0:"shortages"] + print(f"First store shortages at first tick (numpy array): {first_store_shortage}") - # Query inventory information of all warehouses at first tick, len(snapshot_list["warehouse"]) equals to TOTAL_WAREHOUSES. - all_warehouses_info = snapshot_list["warehouse"][0::"inventories"].reshape(TOTAL_WAREHOUSES, -1) - print(f"All warehouses information at first tick (numpy array): {all_warehouses_info}") + # Query inventory information of all warehouses at first tick, len(snapshot_list["warehouse"]) equals to TOTAL_WAREHOUSES. + all_warehouses_info = snapshot_list["warehouse"][0::"inventories"].reshape(TOTAL_WAREHOUSES, -1) + print(f"All warehouses information at first tick (numpy array): {all_warehouses_info}") - # Add fake shortages to first store. - retail_frame.stores[0].shortages[:] = [i + 1 for i in range(TOTAL_PRODUCT_CATEGORIES)] - retail_frame.take_snapshot(1) + # Add fake shortages to first store. + retail_frame.stores[0].shortages[:] = [i + 1 for i in range(TOTAL_PRODUCT_CATEGORIES)] + retail_frame.take_snapshot(1) - # Query shortage information of first and second store at first and second tick. - store_shortage_history = snapshot_list["store"][[0, 1]: [0, 1]: "shortages"].reshape(2, -1) - print(f"First and second store shortage history at the first and second tick (numpy array): {store_shortage_history}") + # Query shortage information of first and second store at first and second tick. + store_shortage_history = snapshot_list["store"][[0, 1]: [0, 1]: "shortages"].reshape(2, -1) + print(f"First and second store shortage history at the first and second tick (numpy array): {store_shortage_history}") Supported Attribute Data Type ----------------------------- @@ -150,6 +133,7 @@ Supported Attribute Data Type All supported data types for the attribute of the node: .. list-table:: + :widths: 25 25 60 :header-rows: 1 * - Attribute Data Type @@ -171,29 +155,26 @@ All supported data types for the attribute of the node: - double - -1.7E308 .. 1.7E308 - Advanced Features ----------------- For better data access, we also provide some advanced features, including: - * **Attribute value change handler**\ : It is a hook function for the value change event on a specific attribute. The member function with the ``_on_{attribute_name}_changed`` naming pattern will be automatically invoked when the related attribute value changed. Below is the example code: -.. code-block:: python + .. code-block:: python - from maro.backends.frame import node, NodeBase, NodeAttribute + from maro.backends.frame import node, NodeBase, NodeAttribute - @node("test_node") - class TestNode(NodeBase): - test_attribute = NodeAttribute("i") - - def _on_test_attribute_changed(self, value: int): - pass + @node("test_node") + class TestNode(NodeBase): + test_attribute = NodeAttribute("i") + def _on_test_attribute_changed(self, value: int): + pass * **Snapshot list slicing**\ : It provides a slicing interface for querying temporal (frame), spatial (node), intra-node (attribute) information. Both a @@ -201,39 +182,37 @@ For better data access, we also provide some advanced features, including: node(s), and attribute(s), while the empty means querying all. The return value is a flattened 1-dimension NumPy array, which aligns with the slicing order as below: + .. image:: ../images/simulator/snapshot_list_slicing.svg + :target: ../images/simulator/snapshot_list_slicing.svg + :alt: Snapshot List Slicing -.. image:: ../images/simulator/snapshot_list_slicing.svg - :target: ../images/simulator/snapshot_list_slicing.svg - :alt: Snapshot List Slicing - - -.. code-block:: python + .. code-block:: python - snapshot_list = env.snapshot_list + snapshot_list = env.snapshot_list - # Get max size of snapshots (in memory). - print(f"Max snapshot size: {len(snapshot_list)}") + # Get max size of snapshots (in memory). + print(f"Max snapshot size: {len(snapshot_list)}") - # Get snapshots of a specific node type. - test_nodes_snapshots = snapshot_list["test_nodes"] + # Get snapshots of a specific node type. + test_nodes_snapshots = snapshot_list["test_nodes"] - # Get node instance amount. - print(f"Number of test_nodes in the frame: {len(test_nodes_snapshots)}") + # Get node instance amount. + print(f"Number of test_nodes in the frame: {len(test_nodes_snapshots)}") - # Query one attribute on all frames and nodes. - states = test_nodes_snapshots[::"int_attribute"] + # Query one attribute on all frames and nodes. + states = test_nodes_snapshots[::"int_attribute"] - # Query two attributes on all frames and nodes. - states = test_nodes_snapshots[::["int_attribute", "float_attribute"]] + # Query two attributes on all frames and nodes. + states = test_nodes_snapshots[::["int_attribute", "float_attribute"]] - # Query one attribute on all frame and the first node. - states = test_nodes_snapshots[:0:"int_attribute"] + # Query one attribute on all frame and the first node. + states = test_nodes_snapshots[:0:"int_attribute"] - # Query attribute by node index list. - states = test_nodes_snapshots[:[0, 1, 2]:"int_attribute"] + # Query attribute by node index list. + states = test_nodes_snapshots[:[0, 1, 2]:"int_attribute"] - # Query one attribute on the first frame and the first node. - states = test_nodes_snapshots[0:0:"int_attribute"] + # Query one attribute on the first frame and the first node. + states = test_nodes_snapshots[0:0:"int_attribute"] - # Query attribute by frame index list. - states = test_nodes_snapshots[[0, 1, 2]: 0: "int_attribute"] + # Query attribute by frame index list. + states = test_nodes_snapshots[[0, 1, 2]: 0: "int_attribute"] diff --git a/docs/source/key_components/distributed_toolkit.rst b/docs/source/key_components/distributed_toolkit.rst index a0df12d93..9f231de40 100644 --- a/docs/source/key_components/distributed_toolkit.rst +++ b/docs/source/key_components/distributed_toolkit.rst @@ -5,18 +5,15 @@ Distributed Toolkit MARO distributed toolkit provides a unified, fast, and infrastructure-independent interface to support RL distributed training. - .. image:: ../images/distributed/overview.svg :target: ../images/distributed/overview.svg :alt: Overview - As shown in the overall architecture diagram above, MARO distributed toolkit follows a message-passing pattern that the cooperation between different components is based on the messages sending and receiving. A typical master/worker distributed program usually contains the following steps: - #. The master component will send tasks(w/ or w/o data) to the worker components; #. The worker components will finish the tasks in their local computing environments or the local devices; @@ -30,22 +27,17 @@ Key Components There are two key components in the distributed toolkit: - .. image:: ../images/distributed/key_components.svg :target: ../images/distributed/key_components.svg :alt: Key Components - - -* - **Communication**\ : It provides the general message passing interfaces, such as +* **Communication**\ : It provides the general message passing interfaces, such as ``(i)send``\ , ``receive``\ , ``(i)broadcast``\ , ``(i)scatter``\ , etc. The communication component use a replaceable communication protocol driver to adopt different communication protocol stack (e.g. `TCP/IP `_\ , `InfiniBand `_\ %20is%20a%20computer,both%20among%20and%20within%20computers.) ). Check the `distributed communication <./communication.html>`_ to get more details. -* - **Orchestration**\ : It primarily provides a unified interface for cluster +* **Orchestration**\ : It primarily provides a unified interface for cluster management and job management on different infrastructures. Check the `distributed orchestration <./orchestration.html>`_ to get more details. diff --git a/docs/source/key_components/event_buffer.rst b/docs/source/key_components/event_buffer.rst index 5188e6746..72a18debf 100644 --- a/docs/source/key_components/event_buffer.rst +++ b/docs/source/key_components/event_buffer.rst @@ -22,11 +22,10 @@ the FIFO rule. Currently, only a single-thread version event buffer is provided. # Execute events at a specific tick. executed_events = event_buffer.execute(tick) - .. image:: ../images/simulator/event_buffer.svg :target: ../images/simulator/event_buffer.svg :alt: Event Buffer - + :width: 700 Event Category -------------- @@ -34,7 +33,6 @@ Event Category To simplify the implementation of the business logic, MARO provides two kinds of basic event types, which can be used to construct various event execution pattern: - * **Atom event** is an event without any dependence. An atom event will be immediately popped out from the event buffer after execution. * **Cascade event** is a series of events with dependencies. An internal event @@ -49,7 +47,6 @@ Event Format We provide a general-purpose event format for all the scenarios in MARO. A legal event generally contains the following properties: - * **tick** (int): The execution tick of this event. * **event_type** (int): The type of this event. It is a customized field, the default value is 0 (PREDEFINE_EVENT_ACTION). @@ -64,8 +61,6 @@ event generally contains the following properties: * **state** (EventState): The state of this event. Valid values include PENDING, EXECUTING and FINISHED. The figure below indicates the state changing of an event: - .. image:: ../images/simulator/event_state.svg :target: ../images/simulator/event_state.svg :alt: Event State - diff --git a/docs/source/key_components/orchestration.rst b/docs/source/key_components/orchestration.rst index 67668be97..4761437bb 100644 --- a/docs/source/key_components/orchestration.rst +++ b/docs/source/key_components/orchestration.rst @@ -10,11 +10,10 @@ are dockerized for easy deployment and resource allocation. It provides a unifie abstraction/interface for different orchestration framework (e.g. `Grass <#grass>`_\ , `Kubernetes <#kubernetes>`_\ ). - .. image:: ../images/distributed/orch_overview.svg :target: ../images/distributed/orch_overview.svg :alt: Orchestration Overview - + :width: 600 Grass ----- @@ -24,14 +23,12 @@ confidently applied to small/middle size cluster (< 200 nodes). The design goal of Grass is to speed up the distributed algorithm prototype development. It has the following advantages: - * Fast deployment in a small cluster. * Fine-grained resource management. * Lightweight, no other dependencies are required. In the Grass mode: - * All VMs will be deployed in the same virtual network for a faster, more stable connection and larger bandwidth. Please note that the maximum number of VMs is limited by the `available dedicated IP addresses `_. @@ -43,11 +40,10 @@ In the Grass mode: Check `Grass Cluster Provisioning on Azure <../installation/grass_cluster_provisioning_on_azure.html>`_ to get how to use it. - .. image:: ../images/distributed/orch_grass.svg :target: ../images/distributed/orch_grass.svg :alt: Orchestration Grass Mode in Azure - + :width: 600 Kubernetes ---------- @@ -56,13 +52,11 @@ MARO also supports Kubernetes (k8s) as an orchestration option. With this widely used framework, you can easily build up your training cluster with hundreds and thousands of nodes. It has the following advantages: - * Higher durability. * Better scalability. In the Kubernetes mode: - * The dockerized job component runs in Kubernetes pod, and each pod only hosts one component. * All Kubernetes pods are registered into the same virtual network using @@ -71,8 +65,7 @@ In the Kubernetes mode: Check `K8S Cluster Provisioning on Azure <../installation/k8s_cluster_provisioning_on_azure.html>`_ to get how to use it. - .. image:: ../images/distributed/orch_k8s.svg :target: ../images/distributed/orch_k8s.svg :alt: Orchestration K8S Mode in Azure - + :width: 600 diff --git a/docs/source/key_components/rl_toolkit.rst b/docs/source/key_components/rl_toolkit.rst index 635aa9597..857023da8 100644 --- a/docs/source/key_components/rl_toolkit.rst +++ b/docs/source/key_components/rl_toolkit.rst @@ -12,61 +12,57 @@ scenarios in a scalable way. The main abstractions include Learner and Actor ----------------- - .. image:: ../images/rl/overview.svg :target: ../images/rl/overview.svg :alt: RL Overview - - * **Learner** is the abstraction of the learnable policy. It is responsible for learning a qualified policy to improve the business optimized object. -.. code-block:: python + .. code-block:: python - # Train function of learner. - def train(self, total_episodes): - for current_ep in range(total_episodes): - models = self._trainable_agents.get_models() - performance, experiences = self._actor.roll_out(models=models, - epsilons=self._trainable_agents.explorer.epsilons, - seed=self._seed) - - self._trainable_agents.store_experiences(experiences) - self._trainable_agents.train() - self._trainable_agents.update_epsilon(performance) + # Train function of learner. + def train(self, total_episodes): + for current_ep in range(total_episodes): + models = self._trainable_agents.get_models() + performance, experiences = self._actor.roll_out(models=models, + epsilons=self._trainable_agents.explorer.epsilons, + seed=self._seed) + self._trainable_agents.store_experiences(experiences) + self._trainable_agents.train() + self._trainable_agents.update_epsilon(performance) * **Actor** is the abstraction of experience collection. It is responsible for interacting with the environment and collecting experience. The experiences collected during interaction will be used for the training of the learners. -.. code-block:: python + .. code-block:: python - # Rollout function of actor. - def roll_out(self, models=None, epsilons=None, seed: int = None): - self._env.set_seed(seed) + # Rollout function of actor. + def roll_out(self, models=None, epsilons=None, seed: int = None): + self._env.set_seed(seed) - # Assign epsilon - if epsilons is not None: - self._inference_agents.explorer.epsilons = epsilons + # Assign epsilon + if epsilons is not None: + self._inference_agents.explorer.epsilons = epsilons - # Load models - if models is not None: - self._inference_agents.load_models(models) + # Load models + if models is not None: + self._inference_agents.load_models(models) - metrics, decision_event, is_done = self._env.step(None) + metrics, decision_event, is_done = self._env.step(None) - while not is_done: - action = self._inference_agents.choose_action(decision_event, self._env.snapshot_list) - metrics, decision_event, is_done = self._env.step(action) - self._inference_agents.on_env_feedback(metrics) + while not is_done: + action = self._inference_agents.choose_action(decision_event, self._env.snapshot_list) + metrics, decision_event, is_done = self._env.step(action) + self._inference_agents.on_env_feedback(metrics) - experiences = self._inference_agents.post_process(self._env.snapshot_list) - performance = self._env.metrics - self._env.reset() + experiences = self._inference_agents.post_process(self._env.snapshot_list) + performance = self._env.metrics + self._env.reset() - return {'local': performance}, experiences + return {'local': performance}, experiences Agent Manager ------------- @@ -95,12 +91,10 @@ Furthermore, to well serve the distributed algorithm (scalable), the agent manager provides two kinds of working modes, which can be applied in different distributed components, such as inference mode in actor, training mode in learner. - .. image:: ../images/rl/agent_manager.svg :target: ../images/rl/agent_manager.svg :alt: Agent Manager - - + :width: 750 * In **inference mode**\ , the agent manager is responsible to access and shape the environment state for the related agent, convert the model action to an @@ -119,12 +113,10 @@ experience storage, sampling strategies, and training strategies. Since all kind of scenario-specific stuff will be handled by the agent manager, the agent is scenario agnostic. - .. image:: ../images/rl/agent.svg :target: ../images/rl/agent.svg :alt: Agent - .. code-block:: python class Agent(object): @@ -148,7 +140,6 @@ scenario agnostic. Under the management of the agent manager: - * In **inference mode**\ , given the shaped model state as input, the agent will output a model action (then the agent manager will shape it into an executable environment action). Also, at the end of each episode, the agent will fill the @@ -164,12 +155,10 @@ problem. The model architecture, loss function, optimizer, and internal model update strategy are designed and parameterized here. In this module, two predefined interfaces must be implemented: - .. image:: ../images/rl/algorithm.svg :target: ../images/rl/algorithm.svg :alt: Algorithm - - + :width: 650 * ``choose_action`` is used to make a decision based on a provided model state. * ``train_on_batch`` is used to trigger training and the policy update from external. @@ -201,7 +190,6 @@ MARO uses shapers to isolate business-related details and the algorithm modeling It provides a clean interactive surface for RL agent(s). The followings are the three usually used shapers in RL formulations: - * **State shaper**\ : Given a decision event, the state shaper will extract relevant temporal-spatial information from the environment (snapshot list) for the decision agent. The output usually follows a format that can be directly inputted to the @@ -229,27 +217,3 @@ three usually used shapers in RL formulations: experiences. By default, we provide a ``k-step return`` experience shaper for general usage, but for better performance, you need to carefully design this part according to your scenario and needs. - - -.. raw:: html - - - - - - -.. raw:: html - - - diff --git a/docs/source/key_components/simulation_toolkit.rst b/docs/source/key_components/simulation_toolkit.rst index b27ed18d4..59f9022f4 100644 --- a/docs/source/key_components/simulation_toolkit.rst +++ b/docs/source/key_components/simulation_toolkit.rst @@ -15,7 +15,6 @@ Overview The key features of MARO simulation toolkit: - * Event-driven, which is friendly to business logging data, and more in line with real-world scenarios; * High execution performance; @@ -39,11 +38,10 @@ pluggable `Business Engine <./business_engine.html>`_ based on the `Event Buffer <./event_buffer.html>`_ supporting. The following figure illustrates the overall architecture of the MARO simulation system. - .. image:: ../images/simulator/overview.svg :target: ../images/simulator/overview.svg :alt: Simulation System Overall - + :width: 600 Environment ----------- @@ -61,12 +59,10 @@ and how to conveniently query data on the levels of spatial, temporal, and intra-node (resource holder). The following shows a typical environment interaction workflow and code snippet. - .. image:: ../images/simulator/interaction_workflow.svg :target: ../images/simulator/interaction_workflow.svg :alt: Agent(s)/Environment Interaction Workflow - .. code-block:: python from maro.simulator import Env diff --git a/docs/source/scenarios/citi_bike.rst b/docs/source/scenarios/citi_bike.rst index 5b67d7a41..8b09db207 100644 --- a/docs/source/scenarios/citi_bike.rst +++ b/docs/source/scenarios/citi_bike.rst @@ -15,7 +15,6 @@ one-way bike trips based on the public trip data from to work or school, run errands, get to appointments or social engagements, and more. - Since the demand for bikes and empty docks is dynamically changed during a day, and the bike flow between two stations are not equal in a same period, some stations suffer from severe bike shortages, while some have too much bikes and @@ -32,7 +31,6 @@ Resource Flow In this scenario, the **bike** is the central resource. Two events will trigger the movement of the bike: - * The first one is the trip requirement, which may cause the bike transfer from the source station to the destination station; * The second one is the repositioning operation. It is used to rebalance the bike @@ -44,7 +42,6 @@ Bike Trip In the citi bike scenario in MARO, the trip generation and the corresponding bike flow is defined as follows: - * Given a fixed time interval, for each specific source-destination station pair, a trip requirement will arise according to a predefined distribution or the real trip data. It depends on the chosen topology. @@ -72,7 +69,6 @@ in the topology and can be customized based on different requirements. The target station candidates of the ``Supply`` and ``Demand`` events are selected by a predefined multi-layer filter in this scenario: - #. The distance between the caller station and the neighboring stations will be used to filter and get a specific number of stations; #. The number of available bikes at each candidate station will be used to further @@ -118,12 +114,10 @@ In toy topology, the generation of the trip requirements follows a stable patter as introduced above. The detailed trip demand pattern are listed as below. And we hope that these toy topologies can provide you with some insights about this scenario. - .. image:: ../images/scenario/citibike.toys.svg :target: ../images/scenario/citibike.toys.svg :alt: Citi Bike toy topologies - **toy.3s_4t**\ : There are three stations in this topology. Every two minutes, there will be a trip requirement from S2 to S3 and a trip requirement from S3 to S2. At the same time, every two minutes, the system will generate trip requirement @@ -192,6 +186,7 @@ No Repositioning - 9,231 - 0 +| .. list-table:: :header-rows: 1 @@ -249,6 +244,7 @@ No Repositioning - 15,876 - 0 +| .. list-table:: :header-rows: 1 @@ -306,6 +302,7 @@ No Repositioning - 14,205 - 0 +| .. list-table:: :header-rows: 1 @@ -339,7 +336,6 @@ No Repositioning - 7,608 - 0 - Random Repositioning ~~~~~~~~~~~~~~~~~~~~ @@ -363,6 +359,7 @@ Random Repositioning - 7,513 :math:`\pm` 40 - 3,242 :math:`\pm` 71 +| .. list-table:: :header-rows: 1 @@ -420,6 +417,7 @@ Random Repositioning - 26,067 :math:`\pm` 234 - 457,173 :math:`\pm` 6,444 +| .. list-table:: :header-rows: 1 @@ -477,6 +475,7 @@ Random Repositioning - 20,119 :math:`\pm` 110 - 437,311 :math:`\pm` 5,936 +| .. list-table:: :header-rows: 1 @@ -510,7 +509,6 @@ Random Repositioning - 11,741 :math:`\pm` 170 - 349,932 :math:`\pm` 4,375 - Quick Start ----------- @@ -545,7 +543,6 @@ The data ``generate`` command is used to automatically download and build the sp predefined scenario and topology data files for the simulation. Currently, there are three arguments for the data ``generate`` command: - * ``-s``\ : required, used to specify the predefined scenario. Valid scenarios are listed in the result of `environment list command <#environment-list-command>`_. * ``-t``\ : required, used to specify the predefined topology. Valid topologies are @@ -580,7 +577,6 @@ The data ``convert`` command is used to convert the CSV data files to binary dat files that the simulator needs. Currently, there are three arguments for the data ``convert`` command: - * ``--meta``\ : required, used to specify the path of the meta file. The source columns that to be converted and the data type of each columns should be specified in the meta file. @@ -608,7 +604,6 @@ Once the environment need the agent's response to reposition bikes, it will throw an ``DecisionEvent``. In the scenario of Citi Bike, the information of each ``DecisionEvent`` is listed as below: - * **station_idx** (int): The id of the station/agent that needs to respond to the environment. * **tick** (int): The corresponding tick. @@ -641,7 +636,6 @@ Action Once we get a ``DecisionEvent`` from the environment, we should respond with an ``Action``. Valid ``Action`` could be: - * ``None``\ , which means do nothing. * A valid ``Action`` instance, including: @@ -748,5 +742,5 @@ random mode, we hope this could help you learn how to use the environment interf print(f"ep: {ep}, environment metrics: {env.metrics}") env.reset() -Jump to `this notebook `_ +Jump to `this notebook `_ for a quick experience. diff --git a/docs/source/scenarios/container_inventory_management.rst b/docs/source/scenarios/container_inventory_management.rst index 3d2927ec5..b970fb943 100644 --- a/docs/source/scenarios/container_inventory_management.rst +++ b/docs/source/scenarios/container_inventory_management.rst @@ -17,18 +17,15 @@ Resource Flow In this scenario, the **container** is the central resource. Two events will trigger the movement of the container: - * The first one is the order, which will lead to the transportation of goods from the source port to the destination port. * The second one is the repositioning operation. It is used to rebalance the container distribution worldwide. - .. image:: ../images/scenario/cim.container_flow.svg :target: ../images/scenario/cim.container_flow.svg :alt: The Life Cycle of the Container - Order ^^^^^ @@ -43,7 +40,6 @@ customized based on different requirements. An order will trigger a life cycle of a container, as shown in the figure above, a life cycle is defined as follows: - * Once an order is generated, an empty container of the corresponding export port (source port) will be released to the shipper. * The shipper will fill the container with cargo which turns it into a laden and @@ -63,7 +59,6 @@ As mentioned above, to rebalance the container distribution, the agent in each port will decide how to reposition the empty containers every time a vessel arrives at the port. The decision consists of two parts: - * Whether to take a ``discharge`` operation or a ``load`` operation; * The number of containers to discharge/load. @@ -95,12 +90,10 @@ problem. Toy Topologies ^^^^^^^^^^^^^^ - .. image:: ../images/scenario/cim.toys.svg :target: ../images/scenario/cim.toys.svg :alt: CIM toy topologies - *(In these topologies, the solid lines indicate the service route (voyage) among ports, while the dashed lines indicate the container flow triggered by orders.)* @@ -143,12 +136,10 @@ ports no longer have a simple supply/demand feature. The cooperation among ports is much more complex and it is difficult to find an efficient repositioning policy manually. - .. image:: ../images/scenario/cim.global_trade.svg :target: ../images/scenario/cim.global_trade.svg :alt: global_trade.22p - *(To make it clearer, the figure above only shows the service routes among ports.)* Naive Baseline @@ -205,6 +196,7 @@ No Repositioning - 2,191,716 - 0 +| .. list-table:: :header-rows: 1 @@ -250,6 +242,7 @@ No Repositioning - 2,141,716 - 0 +| .. list-table:: :header-rows: 1 @@ -295,6 +288,7 @@ No Repositioning - 2,088,716 - 0 +| .. list-table:: :header-rows: 1 @@ -340,7 +334,6 @@ No Repositioning - 1,161,714 - 0 - Random Repositioning ~~~~~~~~~~~~~~~~~~~~ @@ -388,6 +381,7 @@ Random Repositioning - 1,603,063 :math:`\pm` 109,149 - 2,518,920 :math:`\pm` 193,200 +| .. list-table:: :header-rows: 1 @@ -433,6 +427,7 @@ Random Repositioning - 1,763,950 :math:`\pm` 73,935 - 1,858,420 :math:`\pm` 60,680 +| .. list-table:: :header-rows: 1 @@ -478,6 +473,7 @@ Random Repositioning - 1,761,283 :math:`\pm` 22,338 - 1,336,540 :math:`\pm` 30,020 +| .. list-table:: :header-rows: 1 @@ -523,7 +519,6 @@ Random Repositioning - 1,154,184 :math:`\pm` 7,043 - 270,960 :math:`\pm` 2,240 - Quick Start ----------- @@ -549,7 +544,6 @@ Once the environment need the agent's response to promote the simulation, it wil throw an ``DecisionEvent``. In the scenario of CIM, the information of each ``DecisionEvent`` is listed as below: - * **tick** (int): The corresponding tick. * **port_idx** (int): The id of the port/agent that needs to respond to the environment. @@ -572,7 +566,6 @@ Action Once we get a ``DecisionEvent`` from the environment, we should respond with an ``Action``. Valid ``Action`` could be: - * ``None``\ , which means do nothing. * A valid ``Action`` instance, including: @@ -646,5 +639,5 @@ random mode, we hope this could help you learn how to use the environment interf print(f"ep: {ep}, environment metrics: {env.metrics}") env.reset() -Jump to `this notebook `_ +Jump to `this notebook `_ for a quick experience.