diff --git a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py index 3e633cdc..51442ba8 100644 --- a/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py +++ b/PyFlyt/gym_envs/quadx_envs/quadx_waypoints_env.py @@ -182,6 +182,9 @@ def compute_term_trunc_reward(self) -> None: if not self.sparse_reward: self.reward += max(3.0 * self.waypoints.progress_to_next_target, 0.0) self.reward += 0.1 / self.waypoints.distance_to_next_target + yaw_rate = abs(self.env.state(0)[0][2]) # Assuming z-axis is the last component + yaw_rate_penalty = 0.01 * yaw_rate**2# Add penalty for high yaw rate + self.reward -= yaw_rate_penalty # You can adjust the coefficient (0.01) as needed # target reached if self.waypoints.target_reached: