diff --git a/PyFlyt/gym_envs/__init__.py b/PyFlyt/gym_envs/__init__.py index b1d648a9..ace30c75 100644 --- a/PyFlyt/gym_envs/__init__.py +++ b/PyFlyt/gym_envs/__init__.py @@ -6,11 +6,11 @@ # QuadX Envs register( - id="PyFlyt/QuadX-Hover-v3", + id="PyFlyt/QuadX-Hover-v4", entry_point="PyFlyt.gym_envs.quadx_envs.quadx_hover_env:QuadXHoverEnv", ) register( - id="PyFlyt/QuadX-Waypoints-v3", + id="PyFlyt/QuadX-Waypoints-v4", entry_point="PyFlyt.gym_envs.quadx_envs.quadx_waypoints_env:QuadXWaypointsEnv", ) register( @@ -18,15 +18,15 @@ entry_point="PyFlyt.gym_envs.quadx_envs.quadx_gates_env:QuadXGatesEnv", ) register( - id="PyFlyt/QuadX-Pole-Balance-v3", + id="PyFlyt/QuadX-Pole-Balance-v4", entry_point="PyFlyt.gym_envs.quadx_envs.quadx_pole_balance_env:QuadXPoleBalanceEnv", ) register( - id="PyFlyt/QuadX-Pole-Waypoints-v3", + id="PyFlyt/QuadX-Pole-Waypoints-v4", entry_point="PyFlyt.gym_envs.quadx_envs.quadx_pole_waypoints_env:QuadXPoleWaypointsEnv", ) register( - id="PyFlyt/QuadX-Ball-In-Cup-v3", + id="PyFlyt/QuadX-Ball-In-Cup-v4", entry_point="PyFlyt.gym_envs.quadx_envs.quadx_ball_in_cup_env:QuadXBallInCupEnv", ) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py index 523dbc03..50e268a3 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py @@ -255,7 +255,6 @@ def compute_state(self) -> None: def compute_term_trunc_reward(self) -> None: """Computes the termination, truncation, and reward of the current timestep.""" super().compute_base_term_trunc_reward() - # compute some parameters of the ball # lin_pos: [3,], height: [1,], abs_dist: [1,] ball_rel_lin_pos = self.ball_lin_pos - self.env.state(0)[-1] @@ -264,6 +263,15 @@ def compute_term_trunc_reward(self) -> None: # bonus reward if we are not sparse if not self.sparse_reward: + # Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs( + self.env.state(0)[0][2] + ) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate + self.reward -= ( + yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + ) + # reward for staying alive self.reward += 0.4 diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py index ae535386..9b80f25f 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py @@ -117,12 +117,19 @@ def compute_state(self) -> None: def compute_term_trunc_reward(self) -> None: """Computes the termination, truncation, and reward of the current timestep.""" super().compute_base_term_trunc_reward() - if not self.sparse_reward: # distance from 0, 0, 1 hover point linear_distance = np.linalg.norm( self.env.state(0)[-1] - np.array([0.0, 0.0, 1.0]) ) + # Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs( + self.env.state(0)[0][2] + ) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate + self.reward -= ( + yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + ) # how far are we from 0 roll pitch angular_distance = np.linalg.norm(self.env.state(0)[1][:2]) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py index 6c2ec3f7..e3a885dd 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py @@ -184,7 +184,15 @@ def compute_term_trunc_reward(self) -> None: # how far are we from 0 roll pitch angular_distance = np.linalg.norm(self.env.state(0)[1][:2]) - self.reward -= linear_distance + angular_distance self.reward -= self.pole.leaningness self.reward += 1.0 + + # Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs( + self.env.state(0)[0][2] + ) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate + self.reward -= ( + yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + ) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py index 953ed24a..523f3d2e 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py @@ -224,12 +224,19 @@ def compute_state(self) -> None: def compute_term_trunc_reward(self) -> None: """Computes the termination, truncation, and reward of the current timestep.""" super().compute_base_term_trunc_reward() - # bonus reward if we are not sparse if not self.sparse_reward: self.reward += max(15.0 * self.waypoints.progress_to_next_target, 0.0) self.reward += 0.5 / self.waypoints.distance_to_next_target self.reward += 0.5 - self.pole.leaningness + # Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs( + self.env.state(0)[0][2] + ) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate + self.reward -= ( + yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + ) # target reached if self.waypoints.target_reached: diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py index 3e633cdc..8b4c7a39 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py @@ -182,6 +182,14 @@ def compute_term_trunc_reward(self) -> None: if not self.sparse_reward: self.reward += max(3.0 * self.waypoints.progress_to_next_target, 0.0) self.reward += 0.1 / self.waypoints.distance_to_next_target + # Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs( + self.env.state(0)[0][2] + ) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate + self.reward -= ( + yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + ) # target reached if self.waypoints.target_reached: diff --git a/tests/test_gym_envs.py b/tests/test_gym_envs.py index 0879d0b5..92fa83cc 100644 --- a/tests/test_gym_envs.py +++ b/tests/test_gym_envs.py @@ -16,8 +16,8 @@ _WAYPOINT_ENV_CONFIGS = [] for env_name, angle_representation, sparse_reward in itertools.product( [ - "PyFlyt/QuadX-Waypoints-v3", - "PyFlyt/QuadX-Pole-Waypoints-v3", + "PyFlyt/QuadX-Waypoints-v4", + "PyFlyt/QuadX-Pole-Waypoints-v4", "PyFlyt/Fixedwing-Waypoints-v3", ], ["euler", "quaternion"], @@ -37,9 +37,9 @@ _NORMAL_ENV_CONFIGS = [] for env_name, angle_representation, sparse_reward in itertools.product( [ - "PyFlyt/QuadX-Hover-v3", - "PyFlyt/QuadX-Pole-Balance-v3", - "PyFlyt/QuadX-Ball-In-Cup-v3", + "PyFlyt/QuadX-Hover-v4", + "PyFlyt/QuadX-Pole-Balance-v4", + "PyFlyt/QuadX-Ball-In-Cup-v4", "PyFlyt/Rocket-Landing-v4", ], ["euler", "quaternion"],