diff --git a/README.md b/README.md
index bfb4f050d..2750a51cd 100644
--- a/README.md
+++ b/README.md
@@ -121,14 +121,16 @@ of user-defined functions for message auto-handling, cluster provision, and job
```sh
# Install MARO from source.
- bash scripts/install_maro.sh
+ bash scripts/install_maro.sh;
+ pip install -r ./requirements.dev.txt;
```
- Windows
```powershell
# Install MARO from source.
- .\scripts\install_maro.bat
+ .\scripts\install_maro.bat;
+ pip install -r ./requirements.dev.txt;
```
- *Notes: If your package is not found, remember to set your PYTHONPATH*
diff --git a/docs/source/key_components/data_model.rst b/docs/source/key_components/data_model.rst
index 28b9e910c..cd037b552 100644
--- a/docs/source/key_components/data_model.rst
+++ b/docs/source/key_components/data_model.rst
@@ -326,236 +326,84 @@ In CIM scenario, there are 3 node types:
port
++++
-capacity
-********
-
-type: int
-slots: 1
-
-The capacity of port for stocking containers.
-
-empty
-*****
-
-type: int
-slots: 1
-
-Empty container volume on the port.
-
-full
-****
-
-type: int
-slots: 1
-
-Laden container volume on the port.
-
-on_shipper
-**********
-
-type: int
-slots: 1
-
-Empty containers, which are released to the shipper.
-
-on_consignee
-************
-
-type: int
-slots: 1
-
-Laden containers, which are delivered to the consignee.
-
-shortage
-********
-
-type: int
-slots: 1
-
-Per tick state. Shortage of empty container at current tick.
-
-acc_storage
-***********
-
-type: int
-slots: 1
-
-Accumulated shortage number to the current tick.
-
-booking
-*******
-
-type: int
-slots: 1
-
-Per tick state. Order booking number of a port at the current tick.
-
-acc_booking
-***********
-
-type: int
-slots: 1
-
-Accumulated order booking number of a port to the current tick.
-
-fulfillment
-***********
-
-type: int
-slots: 1
-
-Fulfilled order number of a port at the current tick.
-
-acc_fulfillment
-***************
-
-type: int
-slots: 1
-
-Accumulated fulfilled order number of a port to the current tick.
-
-transfer_cost
-*************
-
-type: float
-slots: 1
-
-Cost of transferring container, which also covers loading and discharging cost.
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| Field | Type | Slots | Description |
++==================+=======+========+==================================================================================+
+| capacity | int | 1 | The capacity of port for stocking containers. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| empty | int | 1 | Empty container volume on the port. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| full | int | 1 | Laden container volume on the port. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| on_shipper | int | 1 | Empty containers, which are released to the shipper. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| on_consignee | int | 1 | Laden containers, which are delivered to the consignee. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| shortage | int | 1 | Per tick state. Shortage of empty container at current tick. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| acc_storage | int | 1 | Accumulated shortage number to the current tick. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| booking | int | 1 | Per tick state. Order booking number of a port at the current tick. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| acc_booking | int | 1 | Accumulated order booking number of a port to the current tick. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| fulfillment | int | 1 | Fulfilled order number of a port at the current tick. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| acc_fulfillment | int | 1 | Accumulated fulfilled order number of a port to the current tick. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
+| transfer_cost | float | 1 | Cost of transferring container, which also covers loading and discharging cost. |
++------------------+-------+--------+----------------------------------------------------------------------------------+
vessel
++++++
-capacity
-********
-
-type: int
-slots: 1
-
-The capacity of vessel for transferring containers.
-
-NOTE:
-This attribute is ignored in current implementation.
-
-empty
-*****
-
-type: int
-slots: 1
-
-Empty container volume on the vessel.
-
-full
-****
-
-type: int
-slots: 1
-
-Laden container volume on the vessel.
-
-remaining_space
-***************
-
-type: int
-slots: 1
-
-Remaining space of the vessel.
-
-early_discharge
-***************
-
-type: int
-slots: 1
-
-Discharged empty container number for loading laden containers.
-
-route_idx
-*********
-
-type: int
-slots: 1
-
-Which route current vessel belongs to.
-
-last_loc_idx
-************
-
-type: int
-slots: 1
-
-Last stop port index in route, it is used to identify where is current vessel.
-
-next_loc_idx
-************
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Field | Type | Slots | Description |
++========================+========+==========+=================================================================================================================================================================================================================================+
+| capacity | int | 1 | The capacity of vessel for transferring containers. NOTE: This attribute is ignored in current implementation. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| empty | int | 1 | Empty container volume on the vessel. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| full | int | 1 | Laden container volume on the vessel. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| remaining_space | int | 1 | Remaining space of the vessel. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| early_discharge | int | 1 | Discharged empty container number for loading laden containers. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| is_parking | short | 1 | Is parking or not |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| loc_port_idx | int | 1 | The port index the vessel is parking at. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| route_idx | int | 1 | Which route current vessel belongs to. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| last_loc_idx | int | 1 | Last stop port index in route, it is used to identify where is current vessel. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| next_loc_idx | int | 1 | Next stop port index in route, it is used to identify where is current vessel. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| past_stop_list | int | dynamic | NOTE: This and following attribute are special, that its slot number is determined by configuration, but different with a list attribute, its slot number is fixed at runtime. Stop indices that we have stopped in the past. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| past_stop_tick_list | int | dynamic | Ticks that we stopped at the port in the past. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| future_stop_list | int | dynamic | Stop indices that we will stop in the future. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| future_stop_tick_list | int | dynamic | Ticks that we will stop in the future. |
++------------------------+--------+----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-type: int
-slots: 1
-
-Next stop port index in route, it is used to identify where is current vessel.
-
-past_stop_list
-**************
-
-type: int
-slots: dynamic
-
-NOTE:
-This and following attribute are special, that its slot number is determined by configuration,
-but different with a list attribute, its slot number is fixed at runtime.
-
-Stop indices that we have stopped in the past.
-
-past_stop_tick_list
-*******************
-
-type: int
-slots: dynamic
-
-Ticks that we stopped at the port in the past.
-
-future_stop_list
-****************
-
-type: int
-slots: dynamic
-
-Stop indices that we will stop in the future.
-
-future_stop_tick_list
-*********************
-
-type: int
-slots: dynamic
-
-Ticks that we will stop in the future.
matrices
++++++++
Matrices node is used to store big matrix for ports, vessels and containers.
-full_on_ports
-*************
-
-type: int
-slots: port number * port number
-
-Distribution of full from port to port.
-
-full_on_vessels
-***************
-
-type: int
-slots: vessel number * port number
-
-Distribution of full from vessel to port.
-
-vessel_plans
-************
-
-type: int
-slots: vessel number * port number
-
-Planed route info for vessels.
++------------------+-------+------------------------------+---------------------------------------------+
+| Field | Type | Slots | Description |
++==================+=======+==============================+=============================================+
+| full_on_ports | int | port number * port number | Distribution of full from port to port. |
++------------------+-------+------------------------------+---------------------------------------------+
+| full_on_vessels | int | vessel number * port number | Distribution of full from vessel to port. |
++------------------+-------+------------------------------+---------------------------------------------+
+| vessel_plans | int | vessel number * port number | Planed route info for vessels. |
++------------------+-------+------------------------------+---------------------------------------------+
How to
~~~~~~
@@ -597,133 +445,47 @@ Nodes and attributes in scenario
station
+++++++
-bikes
-*****
-
-type: int
-slots: 1
-
-How many bikes avaiable in current station.
-
-shortage
-********
-
-type: int
-slots: 1
-
-Per tick state. Lack number of bikes in current station.
-
-trip_requirement
-****************
-
-type: int
-slots: 1
-
-Per tick states. How many requirements in current station.
-
-fulfillment
-***********
-
-type: int
-slots: 1
-
-How many requirement is fit in current station.
-
-capacity
-********
-
-type: int
-slots: 1
-
-Max number of bikes this station can take.
-
-id
-+++
-
-type: int
-slots: 1
-
-Id of current station.
-
-weekday
-*******
-
-type: short
-slots: 1
-
-Weekday at current tick.
-
-temperature
-***********
-
-type: short
-slots: 1
-
-Temperature at current tick.
-
-weather
-*******
-
-type: short
-slots: 1
-
-Weather at current tick.
-
-0: sunny, 1: rainy, 2: snowy, 3: sleet.
-
-holiday
-*******
-
-type: short
-slots: 1
-
-If it is holidy at current tick.
-
-0: holiday, 1: not holiday
-
-extra_cost
-**********
-
-type: int
-slots: 1
-
-Cost after we reach the capacity after executing action, we have to move extra bikes
-to other stations.
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| Field | Type | Slots | Description |
++===================+=======+========+===========================================================================================================+
+| bikes | int | 1 | How many bikes avaiable in current station. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| shortage | int | 1 | Per tick state. Lack number of bikes in current station. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| trip_requirement | int | 1 | Per tick states. How many requirements in current station. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| fulfillment | int | 1 | How many requirement is fit in current station. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| capacity | int | 1 | Max number of bikes this station can take. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| id | int | 1 | Id of current station. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| weekday | short | 1 | Weekday at current tick. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| temperature | short | 1 | Temperature at current tick. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| weather | short | 1 | Weather at current tick. (0: sunny, 1: rainy, 2: snowy, 3: sleet) |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| holiday | short | 1 | If it is holidy at current tick. (0: holiday, 1: not holiday) |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| extra_cost | int | 1 | Cost after we reach the capacity after executing action, we have to move extra bikes to other stations. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| transfer_cost | int | 1 | Cost to execute action to transfer bikes to other station. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| failed_return | int | 1 | Per tick state. How many bikes failed to return to current station. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
+| min_bikes | int | 1 | Min bikes number in a frame. |
++-------------------+-------+--------+-----------------------------------------------------------------------------------------------------------+
-transfer_cost
-*************
-
-type: int
-slots: 1
-
-Cost to execute action to transfer bikes to other station.
-
-failed_return
-*************
-
-type: int
-slots: 1
-
-Per tick state. How many bikes failed to return to current station.
-
-min_bikes
-*********
-
-type: int
-slots: 1
-
-Min bikes number in a frame.
matrices
++++++++
-trips_adj
-*********
-
-type: int
-slots: station number * station number
-
-Used to store trip requirement number between 2 stations.
++------------+-------+----------------------------------+------------------------------------------------------------+
+| Field | Type | Slots | Description |
++============+=======+==================================+============================================================+
+| trips_adj | int | station number * station number | Used to store trip requirement number between 2 stations. |
++------------+-------+----------------------------------+------------------------------------------------------------+
VM-scheduling
@@ -743,315 +505,121 @@ Nodes and attributes in scenario
Cluster
+++++++
-id
-***
-
-type: short
-slots: 1
-
-Id of the cluster.
-
-region_id
-*********
-
-type: short
-slots: 1
-
-Region is of current cluster.
-
-data_center_id
-**************
-
-type: short
-slots: 1
-
-Data center id of current cluster.
-
-total_machine_num
-******************
-
-type: int
-slots: 1
-
-Total number of machines in the cluster.
-
-empty_machine_num
-******************
-
-type: int
-slots: 1
-
-The number of empty machines in this cluster. A empty machine means that its allocated CPU cores are 0.
++--------------------+-------+--------+----------------------------------------------------------------------------------------------------------+
+| Field | Type | Slots | Description |
++====================+=======+========+==========================================================================================================+
+| id | short | 1 | Id of the cluster. |
++--------------------+-------+--------+----------------------------------------------------------------------------------------------------------+
+| region_id | short | 1 | Region id of current cluster. |
++--------------------+-------+--------+----------------------------------------------------------------------------------------------------------+
+| zond_id | short | 1 | Zone id of current cluster. |
++--------------------+-------+--------+----------------------------------------------------------------------------------------------------------+
+| data_center_id | short | 1 | Data center id of current cluster. |
++--------------------+-------+--------+----------------------------------------------------------------------------------------------------------+
+| total_machine_num | int | 1 | Total number of machines in the cluster. |
++--------------------+-------+--------+----------------------------------------------------------------------------------------------------------+
+| empty_machine_num | int | 1 | The number of empty machines in this cluster. A empty machine means that its allocated CPU cores are 0. |
++--------------------+-------+--------+----------------------------------------------------------------------------------------------------------+
data_centers
++++++++++++
-id
-***
-
-type: short
-slots: 1
-
-Id of current data center.
-
-region_id
-*********
-
-type: short
-slots: 1
-
-Region id of current data center.
-
-zone_id
-*******
-
-type: short
-slots: 1
-
-Zone id of current data center.
-
-total_machine_num
-*****************
-
-type: int
-slots: 1
-
-Total number of machine in current data center.
-
-empty_machine_num
-*****************
-
-type: int
-slots: 1
-
-The number of empty machines in current data center.
++--------------------+-------+--------+-------------------------------------------------------+
+| Field | Type | Slots | Description |
++====================+=======+========+=======================================================+
+| id | short | 1 | Id of current data center. |
++--------------------+-------+--------+-------------------------------------------------------+
+| region_id | short | 1 | Region id of current data center. |
++--------------------+-------+--------+-------------------------------------------------------+
+| zone_id | short | 1 | Zone id of current data center. |
++--------------------+-------+--------+-------------------------------------------------------+
+| total_machine_num | int | 1 | Total number of machine in current data center. |
++--------------------+-------+--------+-------------------------------------------------------+
+| empty_machine_num | int | 1 | The number of empty machines in current data center. |
++--------------------+-------+--------+-------------------------------------------------------+
pms
+++
Physical machine node.
-id
-***
-
-type: int
-slots: 1
-
-Id of current machine.
-
-cpu_cores_capacity
-******************
-
-type: short
-slots: 1
-
-Max number of cpu core can be used for current machine.
-
-memory_capacity
-***************
-
-type: short
-slots: 1
-
-Max number of memory can be used for current machine.
-
-pm_type
-*******
-
-type: short
-slots: 1
-
-Type of current machine.
-
-cpu_cores_allocated
-*******************
-
-type: short
-slots: 1
-
-How many cpu core is allocated.
-
-memory_allocated
-****************
-
-type: short
-slots: 1
-
-How many memory is allocated.
-
-cpu_utilization
-***************
-
-type: float
-slots: 1
-
-CPU utilization of current machine.
-
-energy_consumption
-******************
-
-type: float
-slots: 1
-
-Energy consumption of current machine.
-
-oversubscribable
-****************
-
-type: short
-slots: 1
-
-Physical machine type: non-oversubscribable is -1, empty: 0, oversubscribable is 1.
-
-region_id
-*********
-
-type: short
-slots: 1
-
-Region id of current machine.
-
-zone_id
-*******
-
-type: short
-slots: 1
-
-Zone id of current machine.
-
-data_center_id
-**************
-
-type: short
-slots: 1
-
-Data center id of current machine.
-
-cluster_id
-**********
-
-type: short
-slots: 1
-
-Cluster id of current machine.
-
-rack_id
-*******
-
-type: short
-slots: 1
-
-Rack id of current machine.
-
-Rack
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| Field | Type | Slots | Description |
++=====================+=======+========+=================================================================================+
+| id | int | 1 | Id of current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| cpu_cores_capacity | short | 1 | Max number of cpu core can be used for current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| memory_capacity | short | 1 | Max number of memory can be used for current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| pm_type | short | 1 | Type of current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| cpu_cores_allocated | short | 1 | How many cpu core is allocated. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| memory_allocated | short | 1 | How many memory is allocated. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| cpu_utilization | float | 1 | CPU utilization of current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| energy_consumption | float | 1 | Energy consumption of current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| oversubscribable | short | 1 | Physical machine non-oversubscribable is -1, empty: 0, oversubscribable is 1. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| region_id | short | 1 | Region id of current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| zone_id | short | 1 | Zone id of current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| data_center_id | short | 1 | Data center id of current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| cluster_id | short | 1 | Cluster id of current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+| rack_id | short | 1 | Rack id of current machine. |
++---------------------+-------+--------+---------------------------------------------------------------------------------+
+
+rack
++++
-id
-***
-
-type: int
-slots: 1
-
-Id of current rack.
-
-region_id
-*********
-
-type: short
-slots: 1
-
-Region id of current rack.
-
-zone_id
-*******
-
-type: short
-slots: 1
-
-Zone id of current rack.
-
-data_center_id
-**************
-
-type: short
-slots: 1
-
-Data center id of current rack.
-
-cluster_id
-**********
-
-type: short
-slots: 1
-
-Cluster id of current rack.
-
-total_machine_num
-*****************
-
-type: int
-slots: 1
-
-Total number of machines on this rack.
-
-empty_machine_num
-*****************
-
-type: int
-slots: 1
-
-Number of machines that not in use on this rack.
++--------------------+-------+--------+---------------------------------------------------+
+| Field | Type | Slots | Description |
++====================+=======+========+===================================================+
+| id | int | 1 | Id of current rack. |
++--------------------+-------+--------+---------------------------------------------------+
+| region_id | short | 1 | Region id of current rack. |
++--------------------+-------+--------+---------------------------------------------------+
+| zone_id | short | 1 | Zone id of current rack. |
++--------------------+-------+--------+---------------------------------------------------+
+| data_center_id | short | 1 | Data center id of current rack. |
++--------------------+-------+--------+---------------------------------------------------+
+| cluster_id | short | 1 | Cluster id of current rack. |
++--------------------+-------+--------+---------------------------------------------------+
+| total_machine_num | int | 1 | Total number of machines on this rack. |
++--------------------+-------+--------+---------------------------------------------------+
+| empty_machine_num | int | 1 | Number of machines that not in use on this rack. |
++--------------------+-------+--------+---------------------------------------------------+
regions
+++++++
-id
-***
-
-type: short
-slots: 1
-
-Id of curent region.
-
-total_machine_num
-*****************
-
-type: int
-slots: 1
-
-Total number of machines in this region.
-
-empty_machine_num
-*****************
-
-type: int
-slots: 1
-
-Number of machines that not in use in this region.
++--------------------+-------+--------+------------------------------------------------------+
+| Field | Type | Slots | Description |
++====================+=======+========+======================================================+
+| id | short | 1 | Id of current region. |
++--------------------+-------+--------+------------------------------------------------------+
+| total_machine_num | int | 1 | Total number of machines in this region. |
++--------------------+-------+--------+------------------------------------------------------+
+| empty_machine_num | int | 1 | Number of machines that not in use in this region. |
++--------------------+-------+--------+------------------------------------------------------+
zones
+++++
-id
-***
-
-type: short
-slots: 1
-
-Id of this zone.
-
-total_machine_num
-*****************
-
-type: int
-slots: 1
-
-Total number of machines in this zone.
-
-empty_machine_num
-*****************
-
-type: int
-slots: 1
-
-Number of machines that not in use in this zone.
++--------------------+-------+--------+---------------------------------------------------+
+| Field | Type | Slots | Description |
++====================+=======+========+===================================================+
+| id | short | 1 | Id of this zone. |
++--------------------+-------+--------+---------------------------------------------------+
+| region_id | short | 1 | Region id of current rack. |
++--------------------+-------+--------+---------------------------------------------------+
+| total_machine_num | int | 1 | Total number of machines in this zone. |
++--------------------+-------+--------+---------------------------------------------------+
+| empty_machine_num | int | 1 | Number of machines that not in use in this zone. |
++--------------------+-------+--------+---------------------------------------------------+
diff --git a/examples/requirements.ex.txt b/examples/requirements.ex.txt
index dbdc8c561..d920dbe28 100644
--- a/examples/requirements.ex.txt
+++ b/examples/requirements.ex.txt
@@ -1 +1,3 @@
-PuLP==2.1
+matplotlib>=3.1.2
+pulp>=2.1.0
+tweepy>=4.10.0
diff --git a/examples/vm_scheduling/offline_lp/launcher.py b/examples/vm_scheduling/offline_lp/launcher.py
index 2f83512ea..af1c193d2 100644
--- a/examples/vm_scheduling/offline_lp/launcher.py
+++ b/examples/vm_scheduling/offline_lp/launcher.py
@@ -12,7 +12,7 @@
from ilp_agent import IlpAgent
from maro.simulator import Env
-from maro.simulator.scenarios.vm_scheduling import DecisionPayload
+from maro.simulator.scenarios.vm_scheduling import DecisionEvent
from maro.simulator.scenarios.vm_scheduling.common import Action
from maro.utils import LogFormat, Logger, convert_dottable
@@ -46,7 +46,7 @@
env.set_seed(config.env.seed)
metrics: object = None
- decision_event: DecisionPayload = None
+ decision_event: DecisionEvent = None
is_done: bool = False
action: Action = None
diff --git a/examples/vm_scheduling/rl/env_sampler.py b/examples/vm_scheduling/rl/env_sampler.py
index 8e7b89c97..19d6171a9 100644
--- a/examples/vm_scheduling/rl/env_sampler.py
+++ b/examples/vm_scheduling/rl/env_sampler.py
@@ -12,7 +12,7 @@
from maro.rl.rollout import AbsEnvSampler, CacheElement
from maro.simulator import Env
-from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionPayload, PostponeAction
+from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionEvent, PostponeAction
from .config import (
num_features,
@@ -44,7 +44,7 @@ def __init__(self, learn_env: Env, test_env: Env) -> None:
def _get_global_and_agent_state_impl(
self,
- event: DecisionPayload,
+ event: DecisionEvent,
tick: int = None,
) -> Tuple[Union[None, np.ndarray, List[object]], Dict[Any, Union[np.ndarray, List[object]]]]:
pm_state, vm_state = self._get_pm_state(), self._get_vm_state(event)
@@ -71,14 +71,14 @@ def _get_global_and_agent_state_impl(
def _translate_to_env_action(
self,
action_dict: Dict[Any, Union[np.ndarray, List[object]]],
- event: DecisionPayload,
+ event: DecisionEvent,
) -> Dict[Any, object]:
if action_dict["AGENT"] == self.num_pms:
return {"AGENT": PostponeAction(vm_id=event.vm_id, postpone_step=1)}
else:
return {"AGENT": AllocateAction(vm_id=event.vm_id, pm_id=action_dict["AGENT"][0])}
- def _get_reward(self, env_action_dict: Dict[Any, object], event: DecisionPayload, tick: int) -> Dict[Any, float]:
+ def _get_reward(self, env_action_dict: Dict[Any, object], event: DecisionEvent, tick: int) -> Dict[Any, float]:
action = env_action_dict["AGENT"]
conf = reward_shaping_conf if self._env == self._learn_env else test_reward_shaping_conf
if isinstance(action, PostponeAction): # postponement
@@ -121,7 +121,7 @@ def _get_vm_state(self, event):
],
)
- def _get_allocation_reward(self, event: DecisionPayload, alpha: float, beta: float):
+ def _get_allocation_reward(self, event: DecisionEvent, alpha: float, beta: float):
vm_unit_price = self._env.business_engine._get_unit_price(
event.vm_cpu_cores_requirement,
event.vm_memory_requirement,
diff --git a/examples/vm_scheduling/rule_based_algorithm/agent.py b/examples/vm_scheduling/rule_based_algorithm/agent.py
index 7e75aeed1..34eb946a4 100644
--- a/examples/vm_scheduling/rule_based_algorithm/agent.py
+++ b/examples/vm_scheduling/rule_based_algorithm/agent.py
@@ -2,7 +2,7 @@
# Licensed under the MIT license.
from maro.simulator import Env
-from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionPayload, PostponeAction
+from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionEvent, PostponeAction
from maro.simulator.scenarios.vm_scheduling.common import Action
@@ -10,7 +10,7 @@ class VMSchedulingAgent(object):
def __init__(self, algorithm):
self._algorithm = algorithm
- def choose_action(self, decision_event: DecisionPayload, env: Env) -> Action:
+ def choose_action(self, decision_event: DecisionEvent, env: Env) -> Action:
"""This method will determine whether to postpone the current VM or allocate a PM to the current VM."""
valid_pm_num: int = len(decision_event.valid_pms)
diff --git a/examples/vm_scheduling/rule_based_algorithm/best_fit.py b/examples/vm_scheduling/rule_based_algorithm/best_fit.py
index ceaefae57..e6bba17da 100644
--- a/examples/vm_scheduling/rule_based_algorithm/best_fit.py
+++ b/examples/vm_scheduling/rule_based_algorithm/best_fit.py
@@ -5,7 +5,7 @@
from rule_based_algorithm import RuleBasedAlgorithm
from maro.simulator import Env
-from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionPayload
+from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionEvent
class BestFit(RuleBasedAlgorithm):
@@ -13,7 +13,7 @@ def __init__(self, **kwargs):
super().__init__()
self._metric_type: str = kwargs["metric_type"]
- def allocate_vm(self, decision_event: DecisionPayload, env: Env) -> AllocateAction:
+ def allocate_vm(self, decision_event: DecisionEvent, env: Env) -> AllocateAction:
# Use a rule to choose a valid PM.
chosen_idx: int = self._pick_pm_func(decision_event, env)
# Take action to allocate on the chose PM.
diff --git a/examples/vm_scheduling/rule_based_algorithm/bin_packing.py b/examples/vm_scheduling/rule_based_algorithm/bin_packing.py
index 905929036..613ec2786 100644
--- a/examples/vm_scheduling/rule_based_algorithm/bin_packing.py
+++ b/examples/vm_scheduling/rule_based_algorithm/bin_packing.py
@@ -7,7 +7,7 @@
from rule_based_algorithm import RuleBasedAlgorithm
from maro.simulator import Env
-from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionPayload
+from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionEvent
class BinPacking(RuleBasedAlgorithm):
@@ -24,7 +24,7 @@ def _init_bin(self):
self._bins = [[] for _ in range(self._pm_cpu_core_num + 1)]
self._bin_size = [0] * (self._pm_cpu_core_num + 1)
- def allocate_vm(self, decision_event: DecisionPayload, env: Env) -> AllocateAction:
+ def allocate_vm(self, decision_event: DecisionEvent, env: Env) -> AllocateAction:
# Initialize the bin.
self._init_bin()
diff --git a/examples/vm_scheduling/rule_based_algorithm/first_fit.py b/examples/vm_scheduling/rule_based_algorithm/first_fit.py
index ec4b678eb..073105195 100644
--- a/examples/vm_scheduling/rule_based_algorithm/first_fit.py
+++ b/examples/vm_scheduling/rule_based_algorithm/first_fit.py
@@ -4,14 +4,14 @@
from rule_based_algorithm import RuleBasedAlgorithm
from maro.simulator import Env
-from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionPayload
+from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionEvent
class FirstFit(RuleBasedAlgorithm):
def __init__(self, **kwargs):
super().__init__()
- def allocate_vm(self, decision_event: DecisionPayload, env: Env) -> AllocateAction:
+ def allocate_vm(self, decision_event: DecisionEvent, env: Env) -> AllocateAction:
# Use a valid PM based on its order.
chosen_idx: int = decision_event.valid_pms[0]
# Take action to allocate on the chose PM.
diff --git a/examples/vm_scheduling/rule_based_algorithm/random_pick.py b/examples/vm_scheduling/rule_based_algorithm/random_pick.py
index 88fcd51e8..a20449d74 100644
--- a/examples/vm_scheduling/rule_based_algorithm/random_pick.py
+++ b/examples/vm_scheduling/rule_based_algorithm/random_pick.py
@@ -6,14 +6,14 @@
from rule_based_algorithm import RuleBasedAlgorithm
from maro.simulator import Env
-from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionPayload
+from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionEvent
class RandomPick(RuleBasedAlgorithm):
def __init__(self, **kwargs):
super().__init__()
- def allocate_vm(self, decision_event: DecisionPayload, env: Env) -> AllocateAction:
+ def allocate_vm(self, decision_event: DecisionEvent, env: Env) -> AllocateAction:
valid_pm_num: int = len(decision_event.valid_pms)
# Random choose a valid PM.
chosen_idx: int = random.randint(0, valid_pm_num - 1)
diff --git a/examples/vm_scheduling/rule_based_algorithm/round_robin.py b/examples/vm_scheduling/rule_based_algorithm/round_robin.py
index 5af7df23d..009b6c9c4 100644
--- a/examples/vm_scheduling/rule_based_algorithm/round_robin.py
+++ b/examples/vm_scheduling/rule_based_algorithm/round_robin.py
@@ -4,7 +4,7 @@
from rule_based_algorithm import RuleBasedAlgorithm
from maro.simulator import Env
-from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionPayload
+from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionEvent
class RoundRobin(RuleBasedAlgorithm):
@@ -15,7 +15,7 @@ def __init__(self, **kwargs):
kwargs["env"].snapshot_list["pms"][kwargs["env"].frame_index :: ["cpu_cores_capacity"]].shape[0]
)
- def allocate_vm(self, decision_event: DecisionPayload, env: Env) -> AllocateAction:
+ def allocate_vm(self, decision_event: DecisionEvent, env: Env) -> AllocateAction:
# Choose the valid PM which index is next to the previous chose PM's index
chosen_idx: int = (self._prev_idx + 1) % self._pm_num
while chosen_idx not in decision_event.valid_pms:
diff --git a/examples/vm_scheduling/rule_based_algorithm/rule_based_algorithm.py b/examples/vm_scheduling/rule_based_algorithm/rule_based_algorithm.py
index cb78f6563..021f0e1b1 100644
--- a/examples/vm_scheduling/rule_based_algorithm/rule_based_algorithm.py
+++ b/examples/vm_scheduling/rule_based_algorithm/rule_based_algorithm.py
@@ -4,13 +4,13 @@
import abc
from maro.simulator import Env
-from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionPayload
+from maro.simulator.scenarios.vm_scheduling import AllocateAction, DecisionEvent
class RuleBasedAlgorithm(object):
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
- def allocate_vm(self, decision_event: DecisionPayload, env: Env) -> AllocateAction:
+ def allocate_vm(self, decision_event: DecisionEvent, env: Env) -> AllocateAction:
"""This method will determine allocate which PM to the current VM."""
raise NotImplementedError
diff --git a/maro/cli/inspector/cim_dashboard.py b/maro/cli/inspector/cim_dashboard.py
index 11c8564d8..02e24c57c 100644
--- a/maro/cli/inspector/cim_dashboard.py
+++ b/maro/cli/inspector/cim_dashboard.py
@@ -25,7 +25,7 @@ def start_cim_dashboard(source_path: str, epoch_num: int, prefix: str):
--ports.csv: Record ports' attributes in this file.
--vessel.csv: Record vessels' attributes in this file.
--matrices.csv: Record transfer volume information in this file.
- ………………
+ ......
--epoch_{epoch_num-1}
--manifest.yml: Record basic info like scenario name, name of index_name_mapping file.
--config.yml: Record the relationship between ports' index and name.
diff --git a/maro/cli/inspector/citi_bike_dashboard.py b/maro/cli/inspector/citi_bike_dashboard.py
index b60eb693b..15be35c43 100644
--- a/maro/cli/inspector/citi_bike_dashboard.py
+++ b/maro/cli/inspector/citi_bike_dashboard.py
@@ -24,7 +24,7 @@ def start_citi_bike_dashboard(source_path: str, epoch_num: int, prefix: str):
--stations.csv: Record stations' attributes in this file.
--matrices.csv: Record transfer volume information in this file.
--stations_summary.csv: Record the summary data of current epoch.
- ………………
+ ......
--epoch_{epoch_num-1}
--manifest.yml: Record basic info like scenario name, name of index_name_mapping file.
--full_stations.json: Record the relationship between ports' index and name.
diff --git a/maro/cli/inspector/env_data_process.py b/maro/cli/inspector/env_data_process.py
index 2e8cb0469..bd2b57a09 100644
--- a/maro/cli/inspector/env_data_process.py
+++ b/maro/cli/inspector/env_data_process.py
@@ -28,7 +28,7 @@ def start_vis(source_path: str, force: str, **kwargs: dict):
-input_file_folder_path
--epoch_0 : Data of current epoch.
--holder_info.csv: Attributes of current epoch.
- ………………
+ ......
--epoch_{epoch_num-1}
--manifest.yml: Record basic info like scenario name, name of index_name_mapping file.
--index_name_mapping file: Record the relationship between an index and its name.
diff --git a/maro/common.py b/maro/common.py
new file mode 100644
index 000000000..d3bfa7d40
--- /dev/null
+++ b/maro/common.py
@@ -0,0 +1,21 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+
+class BaseDecisionEvent:
+ """Base class for all decision events.
+
+ We made this design for the convenience of users. As a summary, there are two types of events in MARO:
+ - CascadeEvent & AtomEvent: used to drive the MARO Env / business engine.
+ - DecisionEvent: exposed to users as a means of communication.
+
+ The latter one serves as the `payload` of the former ones inside of MARO Env.
+
+ Therefore, the related namings might be a little bit tricky.
+ - Inside MARO Env: `decision_event` is actually a CascadeEvent. DecisionEvent is the payload of them.
+ - Outside MARO Env (for users): `decision_event` is a DecisionEvent.
+ """
+
+
+class BaseAction:
+ """Base class for all action payloads"""
diff --git a/maro/event_buffer/event_buffer.py b/maro/event_buffer/event_buffer.py
index 88de2517e..8b7525d46 100644
--- a/maro/event_buffer/event_buffer.py
+++ b/maro/event_buffer/event_buffer.py
@@ -4,8 +4,9 @@
import csv
from collections import defaultdict
-from typing import Callable, List, Optional
+from typing import Callable, List, Optional, cast
+from ..common import BaseAction, BaseDecisionEvent
from .event import ActualEvent, AtomEvent, CascadeEvent
from .event_linked_list import EventLinkedList
from .event_pool import EventPool
@@ -122,9 +123,7 @@ def gen_atom_event(self, tick: int, event_type: object, payload: object = None)
Returns:
AtomEvent: Atom event object
"""
- event = self._event_pool.gen(tick, event_type, payload, False)
- assert isinstance(event, AtomEvent)
- return event
+ return cast(AtomEvent, self._event_pool.gen(tick, event_type, payload, is_cascade=False))
def gen_cascade_event(self, tick: int, event_type: object, payload: object) -> CascadeEvent:
"""Generate an cascade event that used to hold immediate events that
@@ -138,31 +137,32 @@ def gen_cascade_event(self, tick: int, event_type: object, payload: object) -> C
Returns:
CascadeEvent: Cascade event object.
"""
- event = self._event_pool.gen(tick, event_type, payload, True)
- assert isinstance(event, CascadeEvent)
- return event
+ return cast(CascadeEvent, self._event_pool.gen(tick, event_type, payload, is_cascade=True))
- def gen_decision_event(self, tick: int, payload: object) -> CascadeEvent:
+ def gen_decision_event(self, tick: int, payload: BaseDecisionEvent) -> CascadeEvent:
"""Generate a decision event that will stop current simulation, and ask agent for action.
Args:
tick (int): Tick that the event will be processed.
- payload (object): Payload of event, used to pass data to handlers.
+ payload (BaseDecisionEvent): Payload of event, used to pass data to handlers.
Returns:
CascadeEvent: Event object
"""
+ assert isinstance(payload, BaseDecisionEvent)
return self.gen_cascade_event(tick, MaroEvents.PENDING_DECISION, payload)
- def gen_action_event(self, tick: int, payload: object) -> CascadeEvent:
+ def gen_action_event(self, tick: int, payloads: List[BaseAction]) -> CascadeEvent:
"""Generate an event that used to dispatch action to business engine.
Args:
tick (int): Tick that the event will be processed.
- payload (object): Payload of event, used to pass data to handlers.
+ payloads (List[BaseAction]): Payloads of event, used to pass data to handlers.
Returns:
CascadeEvent: Event object
"""
- return self.gen_cascade_event(tick, MaroEvents.TAKE_ACTION, payload)
+ assert isinstance(payloads, list)
+ assert all(isinstance(p, BaseAction) for p in payloads)
+ return self.gen_cascade_event(tick, MaroEvents.TAKE_ACTION, payloads)
def register_event_handler(self, event_type: object, handler: Callable) -> None:
"""Register an event with handler, when there is an event need to be processed,
diff --git a/maro/requirements.build.txt b/maro/requirements.build.txt
index d689fd287..b893b4fdb 100644
--- a/maro/requirements.build.txt
+++ b/maro/requirements.build.txt
@@ -1,6 +1,6 @@
-pyjwt
-numpy<1.20.0
-Cython>=0.29.14
+PyJWT>=2.4.0
+numpy>=1.19.0
+cython>=0.29.14
altair>=4.1.0
streamlit>=0.69.1
tqdm>=4.51.0
diff --git a/maro/rl/doc/overview_policy.svg b/maro/rl/doc/overview_policy.svg
new file mode 100644
index 000000000..679b3458d
--- /dev/null
+++ b/maro/rl/doc/overview_policy.svg
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/maro/rl/doc/overview_rlworkflow.svg b/maro/rl/doc/overview_rlworkflow.svg
new file mode 100644
index 000000000..a82103381
--- /dev/null
+++ b/maro/rl/doc/overview_rlworkflow.svg
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/maro/rl/doc/overview_sampler.svg b/maro/rl/doc/overview_sampler.svg
new file mode 100644
index 000000000..7cd991fbc
--- /dev/null
+++ b/maro/rl/doc/overview_sampler.svg
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/maro/rl/doc/overview_trainingmanager.svg b/maro/rl/doc/overview_trainingmanager.svg
new file mode 100644
index 000000000..f2a2ef213
--- /dev/null
+++ b/maro/rl/doc/overview_trainingmanager.svg
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/maro/rl/doc/rl_development_doc.md b/maro/rl/doc/rl_development_doc.md
new file mode 100644
index 000000000..ef1a66336
--- /dev/null
+++ b/maro/rl/doc/rl_development_doc.md
@@ -0,0 +1,122 @@
+# MARO RL
+
+## What's MARO RL
+
+MARO reinforcement learning toolkits (MARO RL) is used to build the reinforcement learning workflows. It consists of three macro parts:
+- Policy: polices used by agents.
+- Sampler: used to schedule the interaction between agents and the environment, and collect the interacting experiences.
+- Training manager: used to train policies.
+
+The figure shows the overall structure. Next, we will explain the roles and functions of different components in RL separately.
+
+
+
+### What's Policy
+
+Policy is a very important part of RL. It has specific policies in different scenarios and the associated products of agents. The figure is the structure diagram of the policy part. In the framework of deep learning, many models regard agent and policy as unified objects. But this is not the case in MARO, it is the highest level abstraction of a policy object is `AbsPolicy`, the Policy Gradient in the figure can be understood as the final model scheme, which is a subclass that inherits `Abspolicy`, here we will involve Some important interfaces:
+
+- `policy_net`: it is the base class for all core networks in the policy. Two different interfaces, `state_dim` and `action_dim`, are defined here, namely state and action. In addition there is `step` which runs a training step to update network parameters according to a given loss; `get_gradients` for all parameter gradients according to a given loss, etc.
+- `MyActorNet`: Since Policynet is a general policy, different policy details need to be updated for different algorithms. This interface is used to update some details of the corresponding algorithm. For example `get_action_probs_impl`.
+
+In Policy, there is a predetermined simplest policy `RuleBasedPolicy`, but it is mainly used in simple scenarios. However, complex scenarios require interactive training between the policy and the environment to obtain the optimal policy, so we define a ` RLPOLICY`.
+
+In Maro's design concept, the strategy cannot be self-trained, and it needs to be trained collaboratively through the training components of the external part. Therefore, in addition to the state return interface, there is also a set of interface definitions related to training:
+- `get_actions`: This is used to obtain the function of related actions according to the state, which derives several similar functions in specific algorithms such as `get_actions_with_probs`, `get_actions_with_logps`, etc.
+- `get_gradients`: Gradients for all parameters can be obtained with a given loss (generic algorithm interface).
+- `apply_gradients`: Apply gradients to the network to update all parameters.
+
+These interfaces will be called by TrainingManager for training. Currently, in the Maro design assumption, all strategies are based on deep learning models, so the training-related interfaces are specially designed for gradient descent.
+
+
+
+### What's Sampler
+
+Next comes the second component in RL - Sampler. It plays an important scheduling role in RL, interacting the policy with the environment and collecting the state and experience returned by the environment. This part is composed of environment simulator, Policy and collection experience components as shown in the figure. Unlike Policy, sampler is an entity that inherits the base class of `AbsEnvSampler`, and `AbsEnvSampler` is mainly used to define the basic data collector and Policy Evaluator.
+
+
+
+
+In Sampler, Systemtick (that is, the time stamp in the system) will continue to enter this component, and Policy will issue relevant Actions to the environment at a specific tick time to enter the environment simulator, and then the component that collects information will recycle the experience and The status is issued as such.
+
+In this step, a base class `AbsEnvSampler` is defined in advance. There are some important functions in it. Only through them can the Sampler function be supported. Let's list a few to explain:
+- `get_global_and_agent_state`: Used to obtain global and local agent state
+- `get_reward`: Get rewards based on Env actions
+- `translate_to_env_action`: Format the data and format the actions generated by the model into objects that the environment can execute These functions are only used as an interface in `AbsEnvSampler`, and the specific implementation is overridden by the sampler in a specific environment. (mentioned later in this section)
+
+In MARO's design, since Policy and Agent are not actually an entity, but a sign similar to an index, a separate management component is required when using them. Here we propose to use `AbsAgentWrapper` for management. Manage agents and policies during experience gathering through some functional functions such as:
+- `set_policy_state`: Used to directly set the state of the policy
+- `choose_actions`: Select the corresponding action according to the state of the agent
+
+After explaining the basic functions of `AbsEnvSampler` and the `AbsAgentWrapper` used for management, we need an intermediate scheduling component to run them together. At this time, we need to mention another function of `AbsEnvSampler`:
+
+- `sample`: This is the scheduling function defined in `AbsEnvSampler`. Its function is to string together the interfaces of each function, interact with the environment and the policy, and then pass the returned information through `post_collect`.
+
+Some of the functions mentioned above will be overwritten by the sampler in a specific environment. The following code is the basic definition of Sampler. In the definition, a specific Smapler needs to be declared and used, namely `EnvSampler` (here, `CIMEnvSampler` is used as an example to illustrate ).
+
+```
+env_sampler=CIMEnvSampler(
+ learn_env=learn_env,
+ test_env=test_env,
+ policies=policies,
+ agent2policy=agent2policy,
+ reward_eval_delay=reward_shaping_conf["time_window"],
+ )
+```
+
+`CIMEnvSampler` is a pre-defined scene in Maro's design, in addition to `VMEnvSampler`. They are Samplers with special words for different scenarios, in which functions such as `get_global_and_agent_state_impl`, `translate_to_env_action`, `get_reward` and other functions will be overwritten, so that they can have different effects from the current scene.
+
+
+### What's Training Manager
+
+In the above introduction, we mentioned that Policy cannot self-update training in Maro, so it must be trained through external algorithms to update Policy, so the role of Training Manager is established. The figure is the workflow, which collects the experience provided by the Sampler, and then transmits it to the training scheduler, and then pushes the train to train and update the policy. We will describe the composition of this component in detail below.
+
+
+
+
+#### Training Scheduler
+
+The first thing we want to talk about is the TrainingScheduler component. It is an important scheduler in TrainingManager. There is a RwardHandle component in it, which is responsible for collecting the experience passed in the previous part. The main function involved is `record_experience()`. When the experience is collected, it is scheduled The trainer will execute `train_step()` again to send the experience to the core Trainer for training.
+
+#### Trainer
+
+After learning about Training Scheduler, this part starts to get into Trainer. For any algorithm, we define the corresponding Params and OPS parts at the time of design. Here we first introduce the base class corresponding to the so-called Params part of the Params part is `TrainerParams`, which is the basic trainer parameter, among which Contains the interface `replay_memory_capacity()` for revisiting memory, the interface `batch_size()` for the batch size of each training, the interface `data_parallelism()` for data parallelism, and the interface `reward_discount()` for reward decay. Parameters are passed directly into the core algorithm.
+
+In addition to parameters, another key is Ops, which is the basic minimum unit of training policy, mainly responsible for loss/gradient calculation and policy update, and each operation is used to train a single policy. The calculation logic related to the gradient is all implemented in ops.
+
+The last part in Tarin is the algorithm part, which is also the core content of training. He combines the above two components to form Train. In Maro's design, the algorithm part provides a base class named `AbsTrainer`, which is mainly used to create the interface of the basic training strategy. The following three are more important. The interface is explained:
+- `build`: The ops and memory required for create are more than the space used to create the call before training.
+- `train_step`: This is an algorithm step scheduler that when called is able to run a training step to update all policies this trainer is responsible for.
+- `record_multiple`:Record rollout all experiences from an environment in the replay memory.
+
+Based on the design of `Abstrain`, two different modes are established for the correspondence between Policy and training algorithms:
+- `SingleAgentTrainer`: This is a trainer that only trains one strategy, which can be understood as only one ops on the Trainer
+- `MultiAgentTrainer`: Similarly, `SingleAgentTrainer` can train a trainer of multiple policies. The trainer may contain multiple ops, which are responsible for the training of multiple policies and other auxiliary objects.
+
+For the two modes, we have also improved different core algorithms, including:
+- `ActorCriticTrainer`:Actor-critic algorithms for generating policies for discrete actions.
+- `DDPGTrainer`: DDPG algorithm for generating policies for successive actions.
+- `DQNTrainer`: DQN algorithm for generating policies for discrete actions.
+- `DiscreteMADDPGTrainer`:MADDPG algorithm for generating policies for discrete actions.
+- `PPOTrainer`: PPO algorithm for generating policies for discrete actions.
+- `SoftActorCriticTrainer`:Soft actor-critic algorithm for generating policies for discrete actions
+
+### RL Component Bundle
+
+The above is the introduction of all the components of the RL part. In addition, since there are three components interacting with each other, a resource management platform is established for management.
+
+RL Component Bundle is a resource management component in this part of RL. There are multiple interfaces to link the defined Sampler, TrainingManager, and workflow. As shown in the following program, a collection is defined by `RLComponentBundle`, which contains `env_sampler `, `policies`, `trainers`, that is, the parts we mentioned above.
+
+```
+rl_component_bundle = RLComponentBundle(
+ env_sampler=CIMEnvSampler(
+ learn_env=learn_env,
+ test_env=test_env,
+ policies=policies,
+ agent2policy=agent2policy,
+ reward_eval_delay=reward_shaping_conf["time_window"],
+ ),
+ agent2policy=agent2policy,
+ policies=policies,
+ trainers=trainers,
+)
+```
diff --git a/maro/rl/model/fc_block.py b/maro/rl/model/fc_block.py
index acd7bd16e..a323dedb0 100644
--- a/maro/rl/model/fc_block.py
+++ b/maro/rl/model/fc_block.py
@@ -84,7 +84,7 @@ def __init__(
self._name = name
def forward(self, x: torch.Tensor) -> torch.Tensor:
- out = self._net(x)
+ out = self._net(x.float())
if self._skip_connection:
out += x
return self._softmax(out) if self._softmax else out
diff --git a/maro/simulator/core.py b/maro/simulator/core.py
index 5a9524e98..8e524bf74 100644
--- a/maro/simulator/core.py
+++ b/maro/simulator/core.py
@@ -1,10 +1,9 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
-from collections.abc import Iterable
from importlib import import_module
from inspect import getmembers, isclass
-from typing import Generator, List, Optional, Tuple
+from typing import Generator, List, Optional, Tuple, Union, cast
from maro.backends.frame import FrameBase, SnapshotList
from maro.data_lib.dump_csv_converter import DumpConverter
@@ -12,6 +11,7 @@
from maro.streamit import streamit
from maro.utils.exception.simulator_exception import BusinessEngineNotFoundError
+from ..common import BaseAction, BaseDecisionEvent
from .abs_core import AbsEnv, DecisionMode
from .scenarios.abs_business_engine import AbsBusinessEngine
from .utils.common import tick_to_frame_index
@@ -73,8 +73,8 @@ def __init__(
self._event_buffer = EventBuffer(disable_finished_events, record_finished_events, record_file_path)
- # decision_events array for dump.
- self._decision_events = []
+ # decision_payloads array for dump.
+ self._decision_payloads = []
# The generator used to push the simulator forward.
self._simulate_generator = self._simulate()
@@ -89,21 +89,48 @@ def __init__(
self._streamit_episode = 0
- def step(self, action) -> Tuple[Optional[dict], Optional[List[object]], Optional[bool]]:
+ def step(
+ self,
+ action: Union[BaseAction, List[BaseAction], None] = None,
+ ) -> Tuple[Optional[dict], Union[BaseDecisionEvent, List[BaseDecisionEvent], None], bool]:
"""Push the environment to next step with action.
+ Under Sequential mode:
+ - If `action` is None, an empty list will be assigned to the decision event.
+ - Otherwise, the action(s) will be assigned to the decision event.
+
+ Under Joint mode:
+ - If `action` is None, no actions will be assigned to any decision event.
+ - If `action` is a single action, it will be assigned to the first decision event.
+ - If `action` is a list, actions are assigned to each decision event in order. If the number of actions
+ is less than the number of decision events, extra decision events will not be assigned actions. If
+ the number of actions if larger than the number of decision events, extra actions will be ignored.
+ If you want to assign multiple actions to specific event(s), please explicitly pass a list of list. For
+ example:
+
+ ```
+ env.step(action=[[a1, a2], a3, [a4, a5]])
+ ```
+
+ Will assign `a1` & `a2` to the first decision event, `a3` to the second decision event, and `a4` & `a5`
+ to the third decision event.
+
+ Particularly, if you only want to assign multiple actions to the first decision event, please
+ pass `[[a1, a2, ..., an]]` (a list of one list) instead of `[a1, a2, ..., an]` (an 1D list of n elements),
+ since the latter one will assign the n actions to the first n decision events.
+
Args:
- action (Action): Action(s) from agent.
+ action (Union[BaseAction, List[BaseAction], None]): Action(s) from agent.
Returns:
tuple: a tuple of (metrics, decision event, is_done).
"""
try:
- metrics, decision_event, _is_done = self._simulate_generator.send(action)
+ metrics, decision_payloads, _is_done = self._simulate_generator.send(action)
except StopIteration:
return None, None, True
- return metrics, decision_event, _is_done
+ return metrics, decision_payloads, _is_done
def dump(self) -> None:
"""Dump environment for restore.
@@ -131,10 +158,14 @@ def reset(self, keep_seed: bool = False) -> None:
self._business_engine.frame.dump(dump_folder)
self._converter.start_processing(self.configs)
- self._converter.dump_descsion_events(self._decision_events, self._start_tick, self._snapshot_resolution)
+ self._converter.dump_descsion_events(
+ self._decision_payloads,
+ self._start_tick,
+ self._snapshot_resolution,
+ )
self._business_engine.dump(dump_folder)
- self._decision_events.clear()
+ self._decision_payloads.clear()
self._business_engine.reset(keep_seed)
@@ -267,7 +298,29 @@ def _init_business_engine(self) -> None:
additional_options=self._additional_options,
)
- def _simulate(self) -> Generator[Tuple[dict, List[object], bool], object, None]:
+ def _assign_action(
+ self,
+ action: Union[BaseAction, List[BaseAction], None],
+ decision_event: CascadeEvent,
+ ) -> None:
+ decision_event.state = EventState.EXECUTING
+
+ if action is None:
+ actions = []
+ elif not isinstance(action, list):
+ actions = [action]
+ else:
+ actions = action
+
+ decision_event.add_immediate_event(self._event_buffer.gen_action_event(self._tick, actions), is_head=True)
+
+ def _simulate(
+ self,
+ ) -> Generator[
+ Tuple[dict, Union[BaseDecisionEvent, List[BaseDecisionEvent]], bool],
+ Union[BaseAction, List[BaseAction], None],
+ None,
+ ]:
"""This is the generator to wrap each episode process."""
self._streamit_episode += 1
@@ -282,7 +335,7 @@ def _simulate(self) -> Generator[Tuple[dict, List[object], bool], object, None]:
while True:
# Keep processing events, until no more events in this tick.
- pending_events = self._event_buffer.execute(self._tick)
+ pending_events = cast(List[CascadeEvent], self._event_buffer.execute(self._tick))
if len(pending_events) == 0:
# We have processed all the event of current tick, lets go for next tick.
@@ -292,50 +345,25 @@ def _simulate(self) -> Generator[Tuple[dict, List[object], bool], object, None]:
self._business_engine.frame.take_snapshot(self.frame_index)
# Append source event id to decision events, to support sequential action in joint mode.
- decision_events = [event.payload for event in pending_events]
-
- decision_events = (
- decision_events[0] if self._decision_mode == DecisionMode.Sequential else decision_events
- )
-
- # Yield current state first, and waiting for action.
- actions = yield self._business_engine.get_metrics(), decision_events, False
- # archive decision events.
- self._decision_events.append(decision_events)
-
- if actions is None:
- # Make business engine easy to work.
- actions = []
- elif not isinstance(actions, Iterable):
- actions = [actions]
+ decision_payloads = [event.payload for event in pending_events]
if self._decision_mode == DecisionMode.Sequential:
- # Generate a new atom event first.
- action_event = self._event_buffer.gen_action_event(self._tick, actions)
-
- # NOTE: decision event always be a CascadeEvent
- # We just append the action into sub event of first pending cascade event.
- event = pending_events[0]
- assert isinstance(event, CascadeEvent)
- event.state = EventState.EXECUTING
- event.add_immediate_event(action_event, is_head=True)
+ self._decision_payloads.append(decision_payloads[0])
+ action = yield self._business_engine.get_metrics(), decision_payloads[0], False
+ self._assign_action(action, pending_events[0])
else:
- # For joint mode, we will assign actions from beginning to end.
- # Then mark others pending events to finished if not sequential action mode.
- for i, pending_event in enumerate(pending_events):
- if i >= len(actions):
- if self._decision_mode == DecisionMode.Joint:
- # Ignore following pending events that have no action matched.
- pending_event.state = EventState.FINISHED
- else:
- # Set the state as executing, so event buffer will not pop them again.
- # Then insert the action to it.
- action = actions[i]
- pending_event.state = EventState.EXECUTING
- action_event = self._event_buffer.gen_action_event(self._tick, action)
-
- assert isinstance(pending_event, CascadeEvent)
- pending_event.add_immediate_event(action_event, is_head=True)
+ self._decision_payloads += decision_payloads
+ actions = yield self._business_engine.get_metrics(), decision_payloads, False
+ if actions is None:
+ actions = []
+ assert isinstance(actions, list)
+
+ for action, event in zip(actions, pending_events):
+ self._assign_action(action, event)
+
+ if self._decision_mode == DecisionMode.Joint:
+ for event in pending_events[len(actions) :]:
+ event.state = EventState.FINISHED
# Check the end tick of the simulation to decide if we should end the simulation.
is_end_tick = self._business_engine.post_step(self._tick)
diff --git a/maro/simulator/doc/overview_event.svg b/maro/simulator/doc/overview_event.svg
new file mode 100644
index 000000000..65ccb08a2
--- /dev/null
+++ b/maro/simulator/doc/overview_event.svg
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/maro/simulator/doc/overview_overview.svg b/maro/simulator/doc/overview_overview.svg
new file mode 100644
index 000000000..b244c6bb4
--- /dev/null
+++ b/maro/simulator/doc/overview_overview.svg
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/maro/simulator/doc/simulator_development_doc.md b/maro/simulator/doc/simulator_development_doc.md
new file mode 100644
index 000000000..667668a81
--- /dev/null
+++ b/maro/simulator/doc/simulator_development_doc.md
@@ -0,0 +1,86 @@
+# MARO simulator
+
+MARO's simulator is the runtime environment of MARO. Its high-level structure is demonstrated in the following figure. We give a brief introdution of the entire structure in a top-down fashion.
+
+
+
+## `Env`
+
+In maro, `Env` (Abbreviation for environment) is one of the most important components. It's the outermost layer of our runtime environment, but is has nothing to do with the specific scenario. Instead, it only provides a general interface that can be called to the scenario, so when using it, you need to create an exclusive `Env` instance for the specific scenario.
+
+When creating it, it is necessary to specify `scenario`, `topology`, and other required parameters (such as `start_tick`, `durations`, and so on). For example:
+
+```python
+env = Env(scenario="cim", topology="global_trade.22p_l0.1", start_tick=0, durations=100)
+```
+
+The key object in `Env` is ++`step()`++, which is the interface that passes each step in the structure to push the next step of the environment. Here is a very simple example of how to interact with Env:
+
+```python
+metrics, decision_event, is_done = env.step(None)
+```
+
+When creating an `Env` instance, an instance of a special BusinessEngine (BE for short) is created internally through the `_init_business_engine()` method. The create `AbsBusinessEngine` instance (BE for short) interacts with the `Env` instance through `_simulate()` method. `_simulate()` method was developed through generators in Python language and is mainly used to drive the simulation scheduling. For the overall program, it is more like a channel linking BE and external decision makers, and the mutual information is exchanged and transferred.
+
+## `Event Buffer`
+
+`EventBuffer` implements the mutual communication between `Env` and BE. In MARO, there are two different types of events:
+
+* `AtomEvent`: Atomic event, i.e. a basic event without any additional functions or properties.
+* `CascadeEvent`: Events with child events. Sometimes (actually in most cases) there are dependencies between events, i.e., an event should be immediately executed right after another event is finished. This is when `CascadeEvent` is used: a `CascadeEvent` has a queue of sub-events. After the cascade event is executed, its sub-events will be immediately executed in order. A sub-event could also have sub-events, through which we can form a hierarchical structure.
+
+There are two necessary parameters for any type of event:
+
+* `payload`: user-defined payload that contains actual contents of the event.
+* `event_type`: user-defined specific type of event, used to categorize the event.
+
+You can defined your own event types of any data types. In MARO, there are two pre-defined event types:
+
+* `PENDING_DECISION`: used for decision events.
+* `TAKE_ACTION`: used for action events.
+
+When BE needs an action from outside, it generates a decision event and throw it to `Env` (through `EventBuffer`). `Env` will pass the decision event with some process, and wait for agents to response. Once it receives the action from the agents, it will generate an action event that wraps the action, and pass the action event to BE (still, through EventBuffer). This is the general description of how `Env` and Be communicate with each other. We will back to this topic with many more details later.
+
+## Business Engine
+
+Before understanding BE, we need to understand `AbsBusinessEngine`, which is an abstract class and the base class for all scenarios. When developing a new scene, the developer should implement the new BE by inheriting this base class.
+
+For a newly developed scenarios, all interfaces required by `AbsBusinessEngine` should be implemented, and then the exclusive scenarios logic can be personalized and implemented in any way you like. However, to make BE more efficient, MARO provides two sets of toolkits:
+
+* `EventBuffer`: In addition to connecting env and BE, this component can also manage the event logic of BE. Compared with processing all events in a loop, the efficiency of using BE management is much higher. Here is a simple example. When using `Eventbuffer` to manage the event flow, we only need to process a specific action on a specific time. The implication is that we only need to wait for a specific time to arrive and process it without repeatedly traversing the check time.(In the program, the variable represented by this particular time is the `tick`)
+* `Frame`: `Frame` is a low-level data structures implemented using Cython. It is much more faster to read and write for structual data.
+
+Although the event logic of be is organized by event buffer, we should understand what is `event handler` before using it.
+
+#### Event Handler
+
+Event handler is a built-in handler for events in the `Eventbuffer`. Its role is to bind events to handler functions, and select different handler functions by judging different types of events (type is initially defined by the user). Now give an example to show you how to deal with it:
+
+ {
+ evet_type_A: Function_A,
+ evet_type_B: Function_B,
+ }
+
+This is a dictionary-type structure containing different functions for different types of events. When the event type is type A, `EventBuffer` will automatically bind it to function A and use that function to handle the event. With the support of `EventBuffer`, BE only needs to complete some other things:
+
+* Define event types.
+* Define processing logic (function) for each type of event (including action event).
+* Register the mapping of event type and processing logic (function) in the event buffer's event handler.
+* Put initial events into event buffer.
+
+## Event Loop
+
+After understanding `Eventbuffer` and `AbsBusinessEngine`, let's talk about the life cycle of events in MARO.
+
+As mentioned above, `Env` will first pass the decision event to the outside, and then the action will be sent back from the outside, and the action will be packaged as an event into the `Env`. With the change of the system `tick`, the event buffer will control the processing order, that is, the current atom event will be processed under a specific tick, and the decision event will be returned.
+
+For these events, we constructed the `EventLinkedList` data structure to store them. Currently, some methods in this structure are currently defined:
+
+* `append_tail` (`append`): This method can be used when a new event needs to be added, which appends the event to the tail.
+* `append_head` : Insert an event to the head, will be the first one to pop.
+* `_extract_sub_events` : to extract immediate events to the head.
+* `clear_finished_and_get_front`: Empty completed events and get unfinished first.
+
+
+
+For the overall program, each event has a start and end state. As shown in the figure, when the system tick starts, the handler will automatically process the `AtomEvent` under the current tick and check whether there is a cascadeevent. When the processing is completed, the end event will be appended to a specific time.
diff --git a/maro/simulator/scenarios/cim/business_engine.py b/maro/simulator/scenarios/cim/business_engine.py
index d490cccfa..6a26dbe99 100644
--- a/maro/simulator/scenarios/cim/business_engine.py
+++ b/maro/simulator/scenarios/cim/business_engine.py
@@ -714,38 +714,38 @@ def _on_action_received(self, event: CascadeEvent):
actions = event.payload
assert isinstance(actions, list)
- if actions:
- for action in actions:
- vessel_idx = action.vessel_idx
- port_idx = action.port_idx
- move_num = action.quantity
- vessel = self._vessels[vessel_idx]
- port = self._ports[port_idx]
- port_empty = port.empty
- vessel_empty = vessel.empty
-
- assert isinstance(action, Action)
- action_type = action.action_type
-
- if action_type == ActionType.DISCHARGE:
- assert move_num <= vessel_empty
-
- port.empty = port_empty + move_num
- vessel.empty = vessel_empty - move_num
- else:
- assert move_num <= min(port_empty, vessel.remaining_space)
-
- port.empty = port_empty - move_num
- vessel.empty = vessel_empty + move_num
-
- # Align the event type to make the output readable.
- event.event_type = Events.DISCHARGE_EMPTY if action_type == ActionType.DISCHARGE else Events.LOAD_EMPTY
-
- # Update transfer cost for port and metrics.
- self._total_operate_num += move_num
- port.transfer_cost += move_num
-
- self._vessel_plans[vessel_idx, port_idx] += self._data_cntr.vessel_period[vessel_idx]
+ for action in actions:
+ assert isinstance(action, Action)
+
+ vessel_idx = action.vessel_idx
+ port_idx = action.port_idx
+ move_num = action.quantity
+ vessel = self._vessels[vessel_idx]
+ port = self._ports[port_idx]
+ port_empty = port.empty
+ vessel_empty = vessel.empty
+
+ action_type = action.action_type
+
+ if action_type == ActionType.DISCHARGE:
+ assert move_num <= vessel_empty
+
+ port.empty = port_empty + move_num
+ vessel.empty = vessel_empty - move_num
+ else:
+ assert move_num <= min(port_empty, vessel.remaining_space)
+
+ port.empty = port_empty - move_num
+ vessel.empty = vessel_empty + move_num
+
+ # Align the event type to make the output readable.
+ event.event_type = Events.DISCHARGE_EMPTY if action_type == ActionType.DISCHARGE else Events.LOAD_EMPTY
+
+ # Update transfer cost for port and metrics.
+ self._total_operate_num += move_num
+ port.transfer_cost += move_num
+
+ self._vessel_plans[vessel_idx, port_idx] += self._data_cntr.vessel_period[vessel_idx]
def _stream_base_info(self):
if streamit:
diff --git a/maro/simulator/scenarios/cim/common.py b/maro/simulator/scenarios/cim/common.py
index 0d44d85a1..cd6e36a8f 100644
--- a/maro/simulator/scenarios/cim/common.py
+++ b/maro/simulator/scenarios/cim/common.py
@@ -5,6 +5,7 @@
from enum import Enum, IntEnum
from maro.backends.frame import SnapshotList
+from maro.common import BaseAction, BaseDecisionEvent
class VesselState(IntEnum):
@@ -21,7 +22,7 @@ class ActionType(Enum):
DISCHARGE = "discharge"
-class Action:
+class Action(BaseAction):
"""Action object that used to pass action from agent to business engine.
Args:
@@ -68,7 +69,7 @@ def __repr__(self):
return "%s {load: %r, discharge: %r}" % (self.__class__.__name__, self.load, self.discharge)
-class DecisionEvent:
+class DecisionEvent(BaseDecisionEvent):
"""Decision event for agent.
Args:
diff --git a/maro/simulator/scenarios/citi_bike/business_engine.py b/maro/simulator/scenarios/citi_bike/business_engine.py
index f0d22bb78..100d0c1b7 100644
--- a/maro/simulator/scenarios/citi_bike/business_engine.py
+++ b/maro/simulator/scenarios/citi_bike/business_engine.py
@@ -15,7 +15,7 @@
from maro.cli.data_pipeline.citi_bike import CitiBikeProcess
from maro.cli.data_pipeline.utils import chagne_file_path
from maro.data_lib import BinaryReader
-from maro.event_buffer import AtomEvent, EventBuffer, MaroEvents
+from maro.event_buffer import AtomEvent, CascadeEvent, EventBuffer, MaroEvents
from maro.simulator.scenarios import AbsBusinessEngine
from maro.simulator.scenarios.helpers import DocableDict
from maro.simulator.scenarios.matrix_accessor import MatrixAttributeAccessor
@@ -23,7 +23,7 @@
from maro.utils.logger import CliLogger
from .adj_loader import load_adj_from_csv
-from .common import BikeReturnPayload, BikeTransferPayload, DecisionEvent
+from .common import Action, BikeReturnPayload, BikeTransferPayload, DecisionEvent
from .decision_strategy import BikeDecisionStrategy
from .events import CitiBikeEvents
from .frame_builder import build_frame
@@ -33,7 +33,6 @@
logger = CliLogger(name=__name__)
-
metrics_desc = """
Citi bike metrics used to provide statistics information at current point (may be in the middle of a tick).
It contains following keys:
@@ -519,14 +518,15 @@ def _on_bike_deliver(self, evt: AtomEvent):
station.bikes = station_bikes + max_accept_number
- def _on_action_received(self, evt: AtomEvent):
+ def _on_action_received(self, evt: CascadeEvent):
"""Callback when we get an action from agent."""
- action = None
+ actions = evt.payload
- if evt is None or evt.payload is None:
- return
+ assert isinstance(actions, list)
+
+ for action in actions:
+ assert isinstance(action, Action)
- for action in evt.payload:
from_station_idx: int = action.from_station_idx
to_station_idx: int = action.to_station_idx
diff --git a/maro/simulator/scenarios/citi_bike/common.py b/maro/simulator/scenarios/citi_bike/common.py
index 8ed0d75af..d7a9b46ea 100644
--- a/maro/simulator/scenarios/citi_bike/common.py
+++ b/maro/simulator/scenarios/citi_bike/common.py
@@ -3,6 +3,8 @@
from enum import Enum
+from maro.common import BaseAction, BaseDecisionEvent
+
class BikeTransferPayload:
"""Payload for bike transfer event.
@@ -63,7 +65,7 @@ class DecisionType(Enum):
Demand = "demand"
-class DecisionEvent:
+class DecisionEvent(BaseDecisionEvent):
"""Citi bike scenario decision event that contains station information for agent to choose action.
Args:
@@ -127,7 +129,7 @@ def __repr__(self):
)
-class Action:
+class Action(BaseAction):
"""Citi bike scenario action object, that used to pass action from agent to business engine.
Args:
diff --git a/maro/simulator/scenarios/citi_bike/station.py b/maro/simulator/scenarios/citi_bike/station.py
index 9515470d3..02709a47a 100644
--- a/maro/simulator/scenarios/citi_bike/station.py
+++ b/maro/simulator/scenarios/citi_bike/station.py
@@ -25,7 +25,7 @@ class Station(NodeBase):
# avg temp
temperature = NodeAttribute("i2")
- # 0: sunny, 1: rainy, 2: snowy, 3: sleet
+ # 0: sunny, 1: rainy, 2: snowy, 3: sleet
weather = NodeAttribute("i2")
# 0: holiday, 1: not holiday
diff --git a/maro/simulator/scenarios/vm_scheduling/__init__.py b/maro/simulator/scenarios/vm_scheduling/__init__.py
index cc9cf253d..316a7925a 100644
--- a/maro/simulator/scenarios/vm_scheduling/__init__.py
+++ b/maro/simulator/scenarios/vm_scheduling/__init__.py
@@ -2,7 +2,7 @@
# Licensed under the MIT license.
from .business_engine import VmSchedulingBusinessEngine
-from .common import AllocateAction, DecisionPayload, Latency, PostponeAction, VmRequestPayload
+from .common import AllocateAction, DecisionEvent, Latency, PostponeAction, VmRequestPayload
from .cpu_reader import CpuReader
from .enums import Events, PmState, PostponeType, VmCategory
from .physical_machine import PhysicalMachine
@@ -12,7 +12,7 @@
"VmSchedulingBusinessEngine",
"AllocateAction",
"PostponeAction",
- "DecisionPayload",
+ "DecisionEvent",
"Latency",
"VmRequestPayload",
"CpuReader",
diff --git a/maro/simulator/scenarios/vm_scheduling/business_engine.py b/maro/simulator/scenarios/vm_scheduling/business_engine.py
index 08f9b9445..30d80348b 100644
--- a/maro/simulator/scenarios/vm_scheduling/business_engine.py
+++ b/maro/simulator/scenarios/vm_scheduling/business_engine.py
@@ -17,7 +17,7 @@
from maro.utils.logger import CliLogger
from maro.utils.utils import convert_dottable
-from .common import AllocateAction, DecisionPayload, Latency, PostponeAction, VmRequestPayload
+from .common import Action, AllocateAction, DecisionEvent, Latency, PostponeAction, VmRequestPayload
from .cpu_reader import CpuReader
from .enums import Events, PmState, PostponeType, VmCategory
from .frame_builder import build_frame
@@ -528,7 +528,7 @@ def get_event_payload_detail(self) -> dict:
"""dict: Event payload details of current scenario."""
return {
Events.REQUEST.name: VmRequestPayload.summary_key,
- MaroEvents.PENDING_DECISION.name: DecisionPayload.summary_key,
+ MaroEvents.PENDING_DECISION.name: DecisionEvent.summary_key,
}
def get_agent_idx_list(self) -> List[int]:
@@ -820,7 +820,7 @@ def _on_vm_required(self, vm_request_event: CascadeEvent):
if len(valid_pm_list) > 0:
# Generate pending decision.
- decision_payload = DecisionPayload(
+ decision_payload = DecisionEvent(
frame_index=self.frame_index(tick=self._tick),
valid_pms=valid_pm_list,
vm_id=vm_info.id,
@@ -846,20 +846,24 @@ def _on_vm_required(self, vm_request_event: CascadeEvent):
def _on_action_received(self, event: CascadeEvent):
"""Callback wen we get an action from agent."""
- action = None
- if event is None or event.payload is None:
+ actions = event.payload
+ assert isinstance(actions, list)
+
+ if len(actions) == 0:
self._pending_vm_request_payload.pop(self._pending_action_vm_id)
return
- cur_tick: int = event.tick
+ for action in actions:
+ assert isinstance(action, Action)
+
+ cur_tick: int = event.tick
- for action in event.payload:
vm_id: int = action.vm_id
if vm_id not in self._pending_vm_request_payload:
raise Exception(f"The VM id: '{vm_id}' sent by agent is invalid.")
- if type(action) == AllocateAction:
+ if isinstance(action, AllocateAction):
pm_id = action.pm_id
vm: VirtualMachine = self._pending_vm_request_payload[vm_id].vm_info
lifetime = vm.lifetime
@@ -899,7 +903,7 @@ def _on_action_received(self, event: CascadeEvent):
)
self._successful_allocation += 1
- elif type(action) == PostponeAction:
+ elif isinstance(action, PostponeAction):
postpone_step = action.postpone_step
remaining_buffer_time = self._pending_vm_request_payload[vm_id].remaining_buffer_time
# Either postpone the requirement event or failed.
diff --git a/maro/simulator/scenarios/vm_scheduling/common.py b/maro/simulator/scenarios/vm_scheduling/common.py
index 03aa4278a..8ed5f6aa7 100644
--- a/maro/simulator/scenarios/vm_scheduling/common.py
+++ b/maro/simulator/scenarios/vm_scheduling/common.py
@@ -3,10 +3,12 @@
from typing import List
+from maro.common import BaseAction, BaseDecisionEvent
+
from .virtual_machine import VirtualMachine
-class Action:
+class Action(BaseAction):
"""VM Scheduling scenario action object, which was used to pass action from agent to business engine.
Args:
@@ -74,7 +76,7 @@ def __repr__(self):
)
-class DecisionPayload:
+class DecisionEvent(BaseDecisionEvent):
"""Decision event in VM Scheduling scenario that contains information for agent to choose action.
Args:
diff --git a/maro/simulator/utils/common.py b/maro/simulator/utils/common.py
index 48105a100..3ddbf7248 100644
--- a/maro/simulator/utils/common.py
+++ b/maro/simulator/utils/common.py
@@ -27,6 +27,11 @@ def get_available_envs():
return envs
+def scenario_not_empty(scenario_path: str) -> bool:
+ _, _, files = next(os.walk(scenario_path))
+ return "business_engine.py" in files
+
+
def get_scenarios() -> List[str]:
"""Get built-in scenario name list.
@@ -35,7 +40,13 @@ def get_scenarios() -> List[str]:
"""
try:
_, scenarios, _ = next(os.walk(scenarios_root_folder))
- scenarios = sorted([s for s in scenarios if not s.startswith("__")])
+ scenarios = sorted(
+ [
+ s
+ for s in scenarios
+ if not s.startswith("__") and scenario_not_empty(os.path.join(scenarios_root_folder, s))
+ ],
+ )
except StopIteration:
return []
diff --git a/notebooks/requirements.nb.txt b/notebooks/requirements.nb.txt
index 518b1a5de..0d20c173f 100644
--- a/notebooks/requirements.nb.txt
+++ b/notebooks/requirements.nb.txt
@@ -1,4 +1,5 @@
-jupyter==1.0.0
+ipython-genutils>=0.2.0
+ipython>=7.16.3
jupyter-client
jupyter-console
jupyter-contrib-core
@@ -7,18 +8,10 @@ jupyter-core
jupyter-highlight-selected-word
jupyter-latex-envs
jupyter-nbextensions-configurator
+jupyter>=1.0.0
jupyterlab
jupyterlab-server
jupyterthemes
-isort==4.3.21
-autopep8==1.4.4
-isort==4.3.21
-pandas==0.25.3
-matplotlib==3.1.2
-seaborn==0.9.0
-ipython==7.16.3
-ipython-genutils==0.2.0
-shap==0.32.1
-seaborn==0.9.0
-numpy<1.20.0
-numba==0.46.0
+matplotlib>=3.1.2
+seaborn>=0.9.0
+shap>=0.32.1
diff --git a/requirements.dev.txt b/requirements.dev.txt
index 8c34db6eb..3bbf44e1e 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -1,69 +1,35 @@
-add-trailing-comma
-altair==4.1.0
-aria2p==0.9.1
-astroid==2.3.3
+altair>=4.1.0
+aria2p>=0.9.1
azure-identity
azure-mgmt-authorization
azure-mgmt-containerservice
azure-mgmt-resource
azure-mgmt-storage
azure-storage-file-share
-black==22.3.0
-certifi==2019.9.11
-cryptography==36.0.1
-cycler==0.10.0
-Cython==0.29.14
-deepdiff==5.7.0
+cryptography>=36.0.1
+deepdiff>=5.7.0
docker
-editorconfig-checker==2.4.0
-flake8==4.0.1
-flask-cors==3.0.10
-flask==1.1.2
-flask_cors==3.0.10
-flask_socketio==5.2.0
-flloat==0.3.0
-geopy==2.0.0
-guppy3==3.0.9
-holidays==0.10.3
-isort==4.3.21
-jinja2==2.11.3
-kiwisolver==1.1.0
-kubernetes==21.7.0
-lazy-object-proxy==1.4.3
-markupsafe==2.0.1
-matplotlib==3.5.2
-mccabe==0.6.1
-networkx==2.4
-networkx==2.4
-numpy<1.20.0
-palettable==3.3.0
-pandas==0.25.3
-pre-commit==2.19.0
-prompt_toolkit==2.0.10
-psutil==5.8.0
-ptvsd==4.3.2
-pulp==2.6.0
-pyaml==20.4.0
-PyJWT==2.4.0
-pyparsing==2.4.5
-python-dateutil==2.8.1
-PyYAML==5.4.1
-pyzmq==19.0.2
-recommonmark~=0.6.0
-redis==3.5.3
-requests==2.25.1
-scipy==1.7.0
-setuptools==58.0.4
-six==1.13.0
-sphinx==1.8.6
-sphinx_rtd_theme==1.0.0
-streamlit==0.69.1
-stringcase==1.2.0
-tabulate==0.8.5
-termgraph==0.5.3
-torch==1.6.0
-torchsummary==1.5.1
-tqdm==4.51.0
-urllib3==1.26.5
-wrapt==1.11.2
-zmq==0.0.0
+Flask>=1.1.2
+Flask_Cors>=3.0.10
+Flask_SocketIO>=5.2.0
+geopy>=2.0.0
+holidays>=0.10.3
+Jinja2>=2.11.3
+kubernetes>=21.7.0
+numpy>=1.19.5
+pandas>=0.25.3
+paramiko>=2.9.2
+prompt_toolkit>=2.0.10
+psutil>=5.8.0
+ptvsd>=4.3.2
+PyJWT>=2.4.0
+python_dateutil>=2.8.1
+PyYAML>=5.4.1
+pyzmq>=19.0.2
+requests>=2.25.1
+scipy>=1.7.0
+streamlit>=0.69.1
+stringcase>=1.2.0
+tabulate>=0.8.5
+tornado>=6.1
+tqdm>=4.51.0
diff --git a/setup.py b/setup.py
index 12e0c035d..213cd09b0 100644
--- a/setup.py
+++ b/setup.py
@@ -135,25 +135,20 @@
],
install_requires=[
# TODO: use a helper function to collect these
- "numpy<1.20.0",
- "scipy<=1.7.0",
- "torch<1.8.0",
"holidays>=0.10.3",
- "pyaml>=20.4.0",
+ "numpy>=1.19.5",
+ "pandas>=0.25.3",
+ "paramiko>=2.9.2",
+ "ptvsd>=4.3.2",
+ "python_dateutil>=2.8.1",
+ "PyYAML>=5.4.1",
+ "pyzmq>=19.0.2",
"redis>=3.5.3",
- "pyzmq<22.1.0",
- "requests<=2.26.0",
- "psutil<5.9.0",
- "deepdiff>=5.2.2",
- "azure-storage-blob<12.9.0",
- "azure-storage-common",
- "geopy>=2.0.0",
- "pandas<1.2",
- "PyYAML<5.5.0",
- "paramiko>=2.7.2",
- "kubernetes>=12.0.1",
- "prompt_toolkit<3.1.0",
- "stringcase>=1.2.0",
+ "requests>=2.25.1",
+ "scipy>=1.7.0",
+ "tabulate>=0.8.5",
+ "torch>=1.6.0, <1.8.0",
+ "tornado>=6.1",
]
+ specific_requires,
entry_points={
diff --git a/tests/requirements.test.txt b/tests/requirements.test.txt
index 221ee85cd..c7ac76f45 100644
--- a/tests/requirements.test.txt
+++ b/tests/requirements.test.txt
@@ -1,23 +1,17 @@
-matplotlib>=3.1.2
-geopy
-pandas<1.2
-numpy<1.20.0
-holidays>=0.10.3
-pyaml>=20.4.0
-redis>=3.5.3
-pyzmq<22.1.0
-influxdb
-requests<=2.26.0
-psutil<5.9.0
-deepdiff>=5.2.2
+altair>=4.1.0
azure-storage-blob<12.9.0
azure-storage-common
-torch<1.8.0
-pytest
-coverage
-termgraph
-paramiko>=2.7.2
-pytz==2019.3
-aria2p==0.9.1
-kubernetes>=12.0.1
-PyYAML<5.5.0
\ No newline at end of file
+coverage>=6.4.1
+deepdiff>=5.7.0
+geopy>=2.0.0
+holidays>=0.10.3
+kubernetes>=21.7.0
+numpy>=1.19.5
+pandas>=0.25.3
+paramiko>=2.9.2
+pytest>=7.1.2
+PyYAML>=5.4.1
+pyzmq>=19.0.2
+redis>=3.5.3
+streamlit>=0.69.1
+termgraph>=0.5.3
diff --git a/tests/test_env.py b/tests/test_env.py
index 3c610337c..d96950d3c 100644
--- a/tests/test_env.py
+++ b/tests/test_env.py
@@ -8,7 +8,7 @@
from maro.simulator.utils import get_available_envs, get_scenarios, get_topologies
from maro.simulator.utils.common import frame_index_to_ticks, tick_to_frame_index
-from .dummy.dummy_business_engine import DummyEngine
+from tests.dummy.dummy_business_engine import DummyEngine
from tests.utils import backends_to_test
@@ -376,24 +376,17 @@ def test_invalid_scenario(self):
with self.assertRaises(FileNotFoundError) as ctx:
Env("cim", "None", 100)
- def test_get_avaiable_envs(self):
- scenario_names = get_scenarios()
+ def test_get_available_envs(self):
+ scenario_names = sorted(get_scenarios())
# we have 3 built-in scenarios
- self.assertEqual(3, len(scenario_names))
-
- self.assertTrue("cim" in scenario_names)
- self.assertTrue("citi_bike" in scenario_names)
-
- cim_topoloies = get_topologies("cim")
- citi_bike_topologies = get_topologies("citi_bike")
- vm_topoloties = get_topologies("vm_scheduling")
+ self.assertListEqual(scenario_names, ["cim", "citi_bike", "vm_scheduling"])
env_list = get_available_envs()
self.assertEqual(
len(env_list),
- len(cim_topoloies) + len(citi_bike_topologies) + len(vm_topoloties) + len(get_topologies("supply_chain")),
+ sum(len(get_topologies(s)) for s in scenario_names),
)
def test_frame_index_to_ticks(self):
@@ -404,7 +397,7 @@ def test_frame_index_to_ticks(self):
self.assertListEqual([0, 1], ticks[0])
self.assertListEqual([8, 9], ticks[4])
- def test_get_avalible_frame_index_to_ticks_with_default_resolution(self):
+ def test_get_available_frame_index_to_ticks_with_default_resolution(self):
for backend_name in backends_to_test:
os.environ["DEFAULT_BACKEND_NAME"] = backend_name
@@ -425,7 +418,7 @@ def test_get_avalible_frame_index_to_ticks_with_default_resolution(self):
self.assertListEqual([t for t in t2f_mapping.keys()], [t for t in range(max_tick)])
self.assertListEqual([f for f in t2f_mapping.values()], [f for f in range(max_tick)])
- def test_get_avalible_frame_index_to_ticks_with_resolution2(self):
+ def test_get_available_frame_index_to_ticks_with_resolution2(self):
for backend_name in backends_to_test:
os.environ["DEFAULT_BACKEND_NAME"] = backend_name
diff --git a/tests/test_event_buffer.py b/tests/test_event_buffer.py
index 88574eb99..b4e70cdc2 100644
--- a/tests/test_event_buffer.py
+++ b/tests/test_event_buffer.py
@@ -6,6 +6,7 @@
import unittest
from typing import Optional
+from maro.common import BaseDecisionEvent
from maro.event_buffer import ActualEvent, AtomEvent, CascadeEvent, DummyEvent, EventBuffer, EventState, MaroEvents
from maro.event_buffer.event_linked_list import EventLinkedList
from maro.event_buffer.event_pool import EventPool
@@ -251,9 +252,9 @@ def cb2(evt):
self.eb.execute(1)
def test_sub_events_with_decision(self):
- evt1 = self.eb.gen_decision_event(1, (1, 1, 1))
- sub1 = self.eb.gen_decision_event(1, (2, 2, 2))
- sub2 = self.eb.gen_decision_event(1, (3, 3, 3))
+ evt1 = self.eb.gen_decision_event(1, BaseDecisionEvent())
+ sub1 = self.eb.gen_decision_event(1, BaseDecisionEvent())
+ sub2 = self.eb.gen_decision_event(1, BaseDecisionEvent())
evt1.add_immediate_event(sub1, is_head=True)
evt1.add_immediate_event(sub2)