From cf326f8396dfd6173fca0eac565f06b4bac3b287 Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:04:31 +0530 Subject: [PATCH 01/21] Update quadx_ball_in_cup_env.py Negative Reward For High Yaw rate, To prevent high yaw while training --- PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py index 523dbc03..ec33653a 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py @@ -256,6 +256,11 @@ def compute_term_trunc_reward(self) -> None: """Computes the termination, truncation, and reward of the current timestep.""" super().compute_base_term_trunc_reward() + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + # compute some parameters of the ball # lin_pos: [3,], height: [1,], abs_dist: [1,] ball_rel_lin_pos = self.ball_lin_pos - self.env.state(0)[-1] From d9c82ae57bf0214f75f9a3c13dcd02fda50b9ac9 Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:05:40 +0530 Subject: [PATCH 02/21] Update quadx_pole_balance_env.py Negative Reward For High Yaw rate, To prevent high yaw while training --- PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py index 6c2ec3f7..d57460d1 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py @@ -184,7 +184,12 @@ def compute_term_trunc_reward(self) -> None: # how far are we from 0 roll pitch angular_distance = np.linalg.norm(self.env.state(0)[1][:2]) - + + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + self.reward -= linear_distance + angular_distance self.reward -= self.pole.leaningness self.reward += 1.0 From 2fa51977bba3898c51ecb8627205b6510f51b85e Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:06:17 +0530 Subject: [PATCH 03/21] Update quadx_hover_env.py Negative Reward For High Yaw rate, To prevent high yaw while training --- PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py index ae535386..b3e4fa15 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py @@ -118,6 +118,11 @@ def compute_term_trunc_reward(self) -> None: """Computes the termination, truncation, and reward of the current timestep.""" super().compute_base_term_trunc_reward() + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + if not self.sparse_reward: # distance from 0, 0, 1 hover point linear_distance = np.linalg.norm( From a7eca1ecab1af3d8686940d25c8e5259f7a32b89 Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:06:48 +0530 Subject: [PATCH 04/21] Update quadx_pole_waypoints_env.py Negative Reward For High Yaw rate, To prevent high yaw while training --- PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py index 953ed24a..09e5a0b4 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py @@ -225,6 +225,12 @@ def compute_term_trunc_reward(self) -> None: """Computes the termination, truncation, and reward of the current timestep.""" super().compute_base_term_trunc_reward() + + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + # bonus reward if we are not sparse if not self.sparse_reward: self.reward += max(15.0 * self.waypoints.progress_to_next_target, 0.0) From 17fd3c42a17cde94aee24ab0211cc57d641fd08f Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Tue, 25 Feb 2025 00:07:20 +0530 Subject: [PATCH 05/21] Update quadx_waypoints_env.py Negative Reward For High Yaw rate, To prevent high yaw while training --- PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py index 3e633cdc..f34740f9 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py @@ -183,6 +183,12 @@ def compute_term_trunc_reward(self) -> None: self.reward += max(3.0 * self.waypoints.progress_to_next_target, 0.0) self.reward += 0.1 / self.waypoints.distance_to_next_target + + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + # target reached if self.waypoints.target_reached: self.reward = 100.0 From 7c62f8b9fd603ed5bd2fdaa62b6458539e9da26c Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 14:03:59 +0530 Subject: [PATCH 06/21] Update __init__.py --- PyFlyt/gym_envs/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/PyFlyt/gym_envs/__init__.py b/PyFlyt/gym_envs/__init__.py index b1d648a9..ace30c75 100644 --- a/PyFlyt/gym_envs/__init__.py +++ b/PyFlyt/gym_envs/__init__.py @@ -6,11 +6,11 @@ # QuadX Envs register( - id="PyFlyt/QuadX-Hover-v3", + id="PyFlyt/QuadX-Hover-v4", entry_point="PyFlyt.gym_envs.quadx_envs.quadx_hover_env:QuadXHoverEnv", ) register( - id="PyFlyt/QuadX-Waypoints-v3", + id="PyFlyt/QuadX-Waypoints-v4", entry_point="PyFlyt.gym_envs.quadx_envs.quadx_waypoints_env:QuadXWaypointsEnv", ) register( @@ -18,15 +18,15 @@ entry_point="PyFlyt.gym_envs.quadx_envs.quadx_gates_env:QuadXGatesEnv", ) register( - id="PyFlyt/QuadX-Pole-Balance-v3", + id="PyFlyt/QuadX-Pole-Balance-v4", entry_point="PyFlyt.gym_envs.quadx_envs.quadx_pole_balance_env:QuadXPoleBalanceEnv", ) register( - id="PyFlyt/QuadX-Pole-Waypoints-v3", + id="PyFlyt/QuadX-Pole-Waypoints-v4", entry_point="PyFlyt.gym_envs.quadx_envs.quadx_pole_waypoints_env:QuadXPoleWaypointsEnv", ) register( - id="PyFlyt/QuadX-Ball-In-Cup-v3", + id="PyFlyt/QuadX-Ball-In-Cup-v4", entry_point="PyFlyt.gym_envs.quadx_envs.quadx_ball_in_cup_env:QuadXBallInCupEnv", ) From 8a3a1be0786b0b0cb685571786ba951156a35ecb Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 14:09:28 +0530 Subject: [PATCH 07/21] Update quadx_pole_balance_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py index d57460d1..4dfd5eb5 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py @@ -184,12 +184,12 @@ def compute_term_trunc_reward(self) -> None: # how far are we from 0 roll pitch angular_distance = np.linalg.norm(self.env.state(0)[1][:2]) - - #Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - self.reward -= linear_distance + angular_distance self.reward -= self.pole.leaningness self.reward += 1.0 + + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + From f070f1365ef18638d08fe80abf269b2abbc4290f Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 16:15:09 +0530 Subject: [PATCH 08/21] Update quadx_waypoints_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py index f34740f9..0972e915 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py @@ -183,12 +183,12 @@ def compute_term_trunc_reward(self) -> None: self.reward += max(3.0 * self.waypoints.progress_to_next_target, 0.0) self.reward += 0.1 / self.waypoints.distance_to_next_target - - #Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - + + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + # target reached if self.waypoints.target_reached: self.reward = 100.0 From 9fc29e78948acc55a72c8984862948c2e413366f Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 16:15:28 +0530 Subject: [PATCH 09/21] Update quadx_pole_waypoints_env.py --- .../gym_envs/quadx_envs/quadx_pole_waypoints_env.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py index 09e5a0b4..fbe62045 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py @@ -226,17 +226,17 @@ def compute_term_trunc_reward(self) -> None: super().compute_base_term_trunc_reward() - #Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - + # bonus reward if we are not sparse if not self.sparse_reward: self.reward += max(15.0 * self.waypoints.progress_to_next_target, 0.0) self.reward += 0.5 / self.waypoints.distance_to_next_target self.reward += 0.5 - self.pole.leaningness - + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + # target reached if self.waypoints.target_reached: self.reward = 300.0 From 587c36cfb4791e6ef9ec4bdc430e890984d989fc Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 16:15:41 +0530 Subject: [PATCH 10/21] Update quadx_pole_balance_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py index 4dfd5eb5..09dbfbb8 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py @@ -188,8 +188,8 @@ def compute_term_trunc_reward(self) -> None: self.reward -= self.pole.leaningness self.reward += 1.0 - #Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + From b8e7081ecf9c14209d935dd426b2cf154f0789f0 Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 16:16:01 +0530 Subject: [PATCH 11/21] Update quadx_hover_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py index b3e4fa15..6faaa3c5 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py @@ -117,18 +117,16 @@ def compute_state(self) -> None: def compute_term_trunc_reward(self) -> None: """Computes the termination, truncation, and reward of the current timestep.""" super().compute_base_term_trunc_reward() - - #Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - if not self.sparse_reward: # distance from 0, 0, 1 hover point linear_distance = np.linalg.norm( self.env.state(0)[-1] - np.array([0.0, 0.0, 1.0]) ) - + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + # how far are we from 0 roll pitch angular_distance = np.linalg.norm(self.env.state(0)[1][:2]) From 63b2f56d51e506257b887a25f5430c38f6294bb4 Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 16:16:59 +0530 Subject: [PATCH 12/21] Update quadx_ball_in_cup_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py index ec33653a..82ddc129 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py @@ -255,12 +255,6 @@ def compute_state(self) -> None: def compute_term_trunc_reward(self) -> None: """Computes the termination, truncation, and reward of the current timestep.""" super().compute_base_term_trunc_reward() - - #Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - # compute some parameters of the ball # lin_pos: [3,], height: [1,], abs_dist: [1,] ball_rel_lin_pos = self.ball_lin_pos - self.env.state(0)[-1] @@ -269,6 +263,11 @@ def compute_term_trunc_reward(self) -> None: # bonus reward if we are not sparse if not self.sparse_reward: + #Negative Reward For High Yaw rate, To prevent high yaw while training + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + # reward for staying alive self.reward += 0.4 From 06317208ad98c6984c3c08846b12b27a01d04608 Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 16:18:26 +0530 Subject: [PATCH 13/21] Update test_gym_envs.py --- tests/test_gym_envs.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_gym_envs.py b/tests/test_gym_envs.py index 0879d0b5..92fa83cc 100644 --- a/tests/test_gym_envs.py +++ b/tests/test_gym_envs.py @@ -16,8 +16,8 @@ _WAYPOINT_ENV_CONFIGS = [] for env_name, angle_representation, sparse_reward in itertools.product( [ - "PyFlyt/QuadX-Waypoints-v3", - "PyFlyt/QuadX-Pole-Waypoints-v3", + "PyFlyt/QuadX-Waypoints-v4", + "PyFlyt/QuadX-Pole-Waypoints-v4", "PyFlyt/Fixedwing-Waypoints-v3", ], ["euler", "quaternion"], @@ -37,9 +37,9 @@ _NORMAL_ENV_CONFIGS = [] for env_name, angle_representation, sparse_reward in itertools.product( [ - "PyFlyt/QuadX-Hover-v3", - "PyFlyt/QuadX-Pole-Balance-v3", - "PyFlyt/QuadX-Ball-In-Cup-v3", + "PyFlyt/QuadX-Hover-v4", + "PyFlyt/QuadX-Pole-Balance-v4", + "PyFlyt/QuadX-Ball-In-Cup-v4", "PyFlyt/Rocket-Landing-v4", ], ["euler", "quaternion"], From 30934153785c41a2cb4a84514177f575e32bc2ad Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 23:16:47 +0530 Subject: [PATCH 14/21] Update quadx_ball_in_cup_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py index 82ddc129..bc47855b 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py @@ -263,9 +263,9 @@ def compute_term_trunc_reward(self) -> None: # bonus reward if we are not sparse if not self.sparse_reward: - #Negative Reward For High Yaw rate, To prevent high yaw while training + # Negative Reward For High Yaw rate, To prevent high yaw while training yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed # reward for staying alive From 7024da695ebf39d4c9070b7a376cf02aad29baf4 Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 23:17:08 +0530 Subject: [PATCH 15/21] Update quadx_hover_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py index 6faaa3c5..da387589 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py @@ -122,9 +122,9 @@ def compute_term_trunc_reward(self) -> None: linear_distance = np.linalg.norm( self.env.state(0)[-1] - np.array([0.0, 0.0, 1.0]) ) - #Negative Reward For High Yaw rate, To prevent high yaw while training + # Negative Reward For High Yaw rate, To prevent high yaw while training yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed # how far are we from 0 roll pitch From 008d7e47c079be5110e9865acf17d902998b5afc Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 23:17:27 +0530 Subject: [PATCH 16/21] Update quadx_pole_balance_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py index 09dbfbb8..90a3c60b 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py @@ -188,8 +188,8 @@ def compute_term_trunc_reward(self) -> None: self.reward -= self.pole.leaningness self.reward += 1.0 - #Negative Reward For High Yaw rate, To prevent high yaw while training + # Negative Reward For High Yaw rate, To prevent high yaw while training yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed From 57b514763b4fb40677828f7274ab8f963b454762 Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 23:17:45 +0530 Subject: [PATCH 17/21] Update quadx_pole_waypoints_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py index fbe62045..092a280a 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py @@ -232,9 +232,9 @@ def compute_term_trunc_reward(self) -> None: self.reward += max(15.0 * self.waypoints.progress_to_next_target, 0.0) self.reward += 0.5 / self.waypoints.distance_to_next_target self.reward += 0.5 - self.pole.leaningness - #Negative Reward For High Yaw rate, To prevent high yaw while training + # Negative Reward For High Yaw rate, To prevent high yaw while training yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed # target reached From 9ac57e785a7b2b5f32bf5738d1edb414e4721bfa Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Fri, 28 Feb 2025 23:18:03 +0530 Subject: [PATCH 18/21] Update quadx_waypoints_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py index 0972e915..0be60013 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py @@ -184,9 +184,9 @@ def compute_term_trunc_reward(self) -> None: self.reward += 0.1 / self.waypoints.distance_to_next_target - #Negative Reward For High Yaw rate, To prevent high yaw while training + # Negative Reward For High Yaw rate, To prevent high yaw while training yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component - yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed # target reached From fb0976aec08573e54851e02b37ba63e4cb032bb0 Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Sat, 1 Mar 2025 02:49:54 +0530 Subject: [PATCH 19/21] Update quadx_waypoints_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py index 0be60013..2bb18f3e 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py @@ -182,8 +182,6 @@ def compute_term_trunc_reward(self) -> None: if not self.sparse_reward: self.reward += max(3.0 * self.waypoints.progress_to_next_target, 0.0) self.reward += 0.1 / self.waypoints.distance_to_next_target - - # Negative Reward For High Yaw rate, To prevent high yaw while training yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate From c072a2eb60c038fc42ff40a9280b4604def41c1a Mon Sep 17 00:00:00 2001 From: NishantChandna1403 <145248794+NishantChandna1403@users.noreply.github.com> Date: Sat, 1 Mar 2025 02:50:09 +0530 Subject: [PATCH 20/21] Update quadx_pole_waypoints_env.py --- PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py index 092a280a..54a47cd0 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py @@ -224,9 +224,6 @@ def compute_state(self) -> None: def compute_term_trunc_reward(self) -> None: """Computes the termination, truncation, and reward of the current timestep.""" super().compute_base_term_trunc_reward() - - - # bonus reward if we are not sparse if not self.sparse_reward: self.reward += max(15.0 * self.waypoints.progress_to_next_target, 0.0) From 46377fccf7b7bc032b85d20f04f0a79ad2b2f758 Mon Sep 17 00:00:00 2001 From: NishantChandna1403 Date: Sat, 1 Mar 2025 11:29:32 +0530 Subject: [PATCH 21/21] chore: apply pre-commit fixes --- PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py | 10 +++++++--- PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py | 10 +++++++--- PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py | 11 +++++++---- .../gym_envs/quadx_envs/quadx_pole_waypoints_env.py | 10 +++++++--- PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py | 10 +++++++--- 5 files changed, 35 insertions(+), 16 deletions(-) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py index bc47855b..50e268a3 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_ball_in_cup_env.py @@ -264,10 +264,14 @@ def compute_term_trunc_reward(self) -> None: # bonus reward if we are not sparse if not self.sparse_reward: # Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate = abs( + self.env.state(0)[0][2] + ) # Assuming z-axis is the last component yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - + self.reward -= ( + yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + ) + # reward for staying alive self.reward += 0.4 diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py index da387589..9b80f25f 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_hover_env.py @@ -123,10 +123,14 @@ def compute_term_trunc_reward(self) -> None: self.env.state(0)[-1] - np.array([0.0, 0.0, 1.0]) ) # Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate = abs( + self.env.state(0)[0][2] + ) # Assuming z-axis is the last component yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - + self.reward -= ( + yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + ) + # how far are we from 0 roll pitch angular_distance = np.linalg.norm(self.env.state(0)[1][:2]) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py index 90a3c60b..e3a885dd 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_balance_env.py @@ -187,9 +187,12 @@ def compute_term_trunc_reward(self) -> None: self.reward -= linear_distance + angular_distance self.reward -= self.pole.leaningness self.reward += 1.0 - + # Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate = abs( + self.env.state(0)[0][2] + ) # Assuming z-axis is the last component yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - + self.reward -= ( + yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + ) diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py index 54a47cd0..523f3d2e 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_pole_waypoints_env.py @@ -230,10 +230,14 @@ def compute_term_trunc_reward(self) -> None: self.reward += 0.5 / self.waypoints.distance_to_next_target self.reward += 0.5 - self.pole.leaningness # Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate = abs( + self.env.state(0)[0][2] + ) # Assuming z-axis is the last component yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - + self.reward -= ( + yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + ) + # target reached if self.waypoints.target_reached: self.reward = 300.0 diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py index 2bb18f3e..8b4c7a39 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py @@ -183,10 +183,14 @@ def compute_term_trunc_reward(self) -> None: self.reward += max(3.0 * self.waypoints.progress_to_next_target, 0.0) self.reward += 0.1 / self.waypoints.distance_to_next_target # Negative Reward For High Yaw rate, To prevent high yaw while training - yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate = abs( + self.env.state(0)[0][2] + ) # Assuming z-axis is the last component yaw_rate_penalty = 0.01 * yaw_rate**2 # Add penalty for high yaw rate - self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed - + self.reward -= ( + yaw_rate_penalty # You can adjust the coefficient (0.01) as needed + ) + # target reached if self.waypoints.target_reached: self.reward = 100.0