diff --git a/envs/__pycache__/simple_biped_env.cpython-312.pyc b/envs/__pycache__/simple_biped_env.cpython-312.pyc index 673bc88..2af9ab6 100644 Binary files a/envs/__pycache__/simple_biped_env.cpython-312.pyc and b/envs/__pycache__/simple_biped_env.cpython-312.pyc differ diff --git a/envs/__pycache__/simple_biped_env.cpython-313.pyc b/envs/__pycache__/simple_biped_env.cpython-313.pyc new file mode 100644 index 0000000..ce476d5 Binary files /dev/null and b/envs/__pycache__/simple_biped_env.cpython-313.pyc differ diff --git a/envs/simple_biped_env.py b/envs/simple_biped_env.py index 9b5b918..a9345f6 100644 --- a/envs/simple_biped_env.py +++ b/envs/simple_biped_env.py @@ -9,20 +9,50 @@ def __init__(self, render=False, max_steps=10000): super(simpleBipedEnv, self).__init__() self.render_mode = render self.physicsClient = p.connect(p.GUI if render else p.DIRECT) - self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0]) p.setAdditionalSearchPath(pybullet_data.getDataPath()) + self.robot_id = p.loadURDF("walkers/simple_biped.urdf", [0, 0, 1.0]) + self.plane_id = p.loadURDF("plane.urdf") + + # Find foot link indices (adjust names to match your URDF!) + self.left_foot = 1 # replace with actual index + self.right_foot = 3 # replace with actual index - num_joints = 4 # left_hip, right_hip, left_knee, right_knee + num_joints = 4 self.action_space = spaces.Box(low=-1, high=1, shape=(num_joints,), dtype=np.float32) self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2*num_joints + 6,), dtype=np.float32) self.max_steps = max_steps self.current_step = 0 self.last_action = None + self.off_ground_counter = 0 # track consecutive steps with no contacts + + def _feet_in_contact(self): + left_contacts = p.getContactPoints(self.robot_id, self.plane_id, linkIndexA=self.left_foot) + right_contacts = p.getContactPoints(self.robot_id, self.plane_id, linkIndexA=self.right_foot) + return len(left_contacts) > 0, len(right_contacts) > 0 + + def _compute_reward(self): + base_pos, _ = p.getBasePositionAndOrientation(self.robot_id) + forward_reward = base_pos[0] + alive_bonus = 1 if not self._check_termination() else -500 + + # --- Penalize if both feet off ground for too long --- + left_contact, right_contact = self._feet_in_contact() + if not left_contact and not right_contact: + self.off_ground_counter += 1 + else: + self.off_ground_counter = 0 + + airborne_penalty = -10 if self.off_ground_counter > 500 else 0 # tune threshold & penalty + + return forward_reward + alive_bonus + airborne_penalty def reset(self, seed=None, options=None): p.resetBasePositionAndOrientation(self.robot_id, [0,0,1], [0,0,0,1]) + num_joints = p.getNumJoints(self.robot_id) + for j in range(num_joints): + p.resetJointState(self.robot_id, j, targetValue=0.0, targetVelocity=0.0) p.setGravity(0, 0, -9.8) plane_id = p.loadURDF("plane.urdf") base_pos = p.getBasePositionAndOrientation(self.robot_id)[0] @@ -45,7 +75,7 @@ def step(self, action): current_positions = np.array([state[0] for state in joint_states], dtype=np.float32) # Apply relative change to joint angles - delta = action * 0.05 # scale step size as needed + delta = action * 0.2 # scale step size as needed target_positions = current_positions + delta p.setJointMotorControlArray( @@ -78,22 +108,6 @@ def _check_termination(self): base_pos = p.getBasePositionAndOrientation(self.robot_id)[0] return base_pos[2] < 0.2 or base_pos[2] > 1.5 # fallen - def _compute_reward(self): - base_pos, _ = p.getBasePositionAndOrientation(self.robot_id) - base_vel, _ = p.getBaseVelocity(self.robot_id) - - forward_reward = base_vel[0] # reward x velocity - if not self._check_termination(): - alive_bonus = 1 - else: - alive_bonus = -500 - torque_penalty = 0.001 * np.sum(np.square(self.last_action)) - - #print(forward_reward, alive_bonus, torque_penalty) - - return forward_reward + alive_bonus - torque_penalty - - def render(self): if self.render_mode: if hasattr(self, 'robot_id'): diff --git a/envs/test.py b/envs/test.py index c238b28..004d811 100644 --- a/envs/test.py +++ b/envs/test.py @@ -15,15 +15,21 @@ max_episode_steps=10000, ) -def train(): +def train_PPO(): LOG_DIR = "./logs/" env = gym.make('simpleBiped-v0', render=False) - model = DDPG("MlpPolicy", env, verbose=1, device="auto", tensorboard_log=LOG_DIR) - model.learn(total_timesteps=10000000) + model = PPO("MlpPolicy", env, verbose=1, learning_rate=0.001, device="auto", tensorboard_log=LOG_DIR) + model.learn(total_timesteps=500000) model.save("../models/humanoid_ppo") env.close() - +def train_DDPG(): + LOG_DIR = "./logs/" + env = gym.make('simpleBiped-v0', render_mode=True) + model = DDPG("MlpPolicy", env, verbose=1, device="auto", tensorboard_log=LOG_DIR) + model.learn(total_timesteps=10000000) + model.save("../models/humanoid_ddpg") + env.close() def run(): env = DummyVecEnv([lambda: simpleBipedEnv(render=True)]) @@ -58,6 +64,7 @@ def test(): print(f"Test episode finished. Total reward: {total_reward}") env.close() -train() +train_PPO() +#train_DDPG() #run() #test() diff --git a/logs/DDPG_1/events.out.tfevents.1756720705.LAPTOP-U4CU9OKR.12120.0 b/logs/DDPG_1/events.out.tfevents.1756720705.LAPTOP-U4CU9OKR.12120.0 deleted file mode 100644 index 6d5134e..0000000 Binary files a/logs/DDPG_1/events.out.tfevents.1756720705.LAPTOP-U4CU9OKR.12120.0 and /dev/null differ diff --git a/logs/DDPG_1/events.out.tfevents.1756729277.DESKTOP-Q8AMD27.35468.0 b/logs/DDPG_1/events.out.tfevents.1756729277.DESKTOP-Q8AMD27.35468.0 new file mode 100644 index 0000000..efb7b52 Binary files /dev/null and b/logs/DDPG_1/events.out.tfevents.1756729277.DESKTOP-Q8AMD27.35468.0 differ diff --git a/logs/PPO_1/events.out.tfevents.1756717278.LAPTOP-U4CU9OKR.26564.0 b/logs/PPO_1/events.out.tfevents.1756717278.LAPTOP-U4CU9OKR.26564.0 deleted file mode 100644 index 2009ec2..0000000 Binary files a/logs/PPO_1/events.out.tfevents.1756717278.LAPTOP-U4CU9OKR.26564.0 and /dev/null differ diff --git a/logs/PPO_1/events.out.tfevents.1756729290.DESKTOP-Q8AMD27.4012.0 b/logs/PPO_1/events.out.tfevents.1756729290.DESKTOP-Q8AMD27.4012.0 new file mode 100644 index 0000000..b424fd5 Binary files /dev/null and b/logs/PPO_1/events.out.tfevents.1756729290.DESKTOP-Q8AMD27.4012.0 differ diff --git a/logs/PPO_2/events.out.tfevents.1756718026.LAPTOP-U4CU9OKR.25076.0 b/logs/PPO_2/events.out.tfevents.1756718026.LAPTOP-U4CU9OKR.25076.0 deleted file mode 100644 index 524c52e..0000000 Binary files a/logs/PPO_2/events.out.tfevents.1756718026.LAPTOP-U4CU9OKR.25076.0 and /dev/null differ diff --git a/logs/PPO_2/events.out.tfevents.1756784683.DESKTOP-Q8AMD27.39556.0 b/logs/PPO_2/events.out.tfevents.1756784683.DESKTOP-Q8AMD27.39556.0 new file mode 100644 index 0000000..1c295e9 Binary files /dev/null and b/logs/PPO_2/events.out.tfevents.1756784683.DESKTOP-Q8AMD27.39556.0 differ diff --git a/logs/PPO_3/events.out.tfevents.1756718088.LAPTOP-U4CU9OKR.12380.0 b/logs/PPO_3/events.out.tfevents.1756718088.LAPTOP-U4CU9OKR.12380.0 deleted file mode 100644 index a6a66f7..0000000 Binary files a/logs/PPO_3/events.out.tfevents.1756718088.LAPTOP-U4CU9OKR.12380.0 and /dev/null differ diff --git a/logs/PPO_3/events.out.tfevents.1757319344.LAPTOP-U4CU9OKR.24828.0 b/logs/PPO_3/events.out.tfevents.1757319344.LAPTOP-U4CU9OKR.24828.0 new file mode 100644 index 0000000..6378fb9 Binary files /dev/null and b/logs/PPO_3/events.out.tfevents.1757319344.LAPTOP-U4CU9OKR.24828.0 differ diff --git a/logs/PPO_4/events.out.tfevents.1757320046.LAPTOP-U4CU9OKR.8248.0 b/logs/PPO_4/events.out.tfevents.1757320046.LAPTOP-U4CU9OKR.8248.0 new file mode 100644 index 0000000..8834ecc Binary files /dev/null and b/logs/PPO_4/events.out.tfevents.1757320046.LAPTOP-U4CU9OKR.8248.0 differ diff --git a/logs/PPO_5/events.out.tfevents.1757322660.LAPTOP-U4CU9OKR.30516.0 b/logs/PPO_5/events.out.tfevents.1757322660.LAPTOP-U4CU9OKR.30516.0 new file mode 100644 index 0000000..b499a5a Binary files /dev/null and b/logs/PPO_5/events.out.tfevents.1757322660.LAPTOP-U4CU9OKR.30516.0 differ diff --git a/requirements.txt b/requirements.txt index 3c4b22b..da7156d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ gymnasium==0.29.1 pybullet==3.2.5 stable-baselines3==2.2.1 numpy>=1.24.0 -matplotlib>=3.7.0 \ No newline at end of file +matplotlib>=3.7.0 +tensorboard==2.20.0 \ No newline at end of file