fix export bug and lr bug

Junfeng-Long · Junfeng-Long · commit ef289acaa627 · 2024-05-14T22:42:18.000+08:00
diff --git a/legged_gym/legged_gym/envs/a1/a1_config.py b/legged_gym/legged_gym/envs/a1/a1_config.py
@@ -103,9 +103,9 @@ class scales:
             stand_still = -0.
             torques = -0.0
             dof_vel = -0.0
-            dof_pos_limits = -5.0
-            dof_vel_limits = -5.0
-            torque_limits = -5.0
+            dof_pos_limits = -0.0
+            dof_vel_limits = -0.0
+            torque_limits = -0.0
 
         only_positive_rewards = False # if true negative total rewards are clipped at zero (avoids early termination problems)
         tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)
diff --git a/legged_gym/legged_gym/envs/aliengo/aliengo_config.py b/legged_gym/legged_gym/envs/aliengo/aliengo_config.py
@@ -102,18 +102,18 @@ class scales:
             stand_still = -0.
             torques = -0.0
             dof_vel = -0.0
-            dof_pos_limits = -5.0
-            dof_vel_limits = -5.0
-            torque_limits = -5.0
+            dof_pos_limits = 0.0
+            dof_vel_limits = 0.0
+            torque_limits = 0.0
 
         only_positive_rewards = False # if true negative total rewards are clipped at zero (avoids early termination problems)
         tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)
         soft_dof_pos_limit = 0.95 # percentage of urdf limits, values above this limit are penalized
         soft_dof_vel_limit = 0.95
         soft_torque_limit = 0.95
-        base_height_target = 0.40
+        base_height_target = 0.30
         max_contact_force = 100. # forces above this value are penalized
-        clearance_height_target = -0.22
+        clearance_height_target = -0.20
 
 class AlienGoRoughCfgPPO( LeggedRobotCfgPPO ):
     class algorithm( LeggedRobotCfgPPO.algorithm ):
diff --git a/legged_gym/legged_gym/envs/base/legged_robot.py b/legged_gym/legged_gym/envs/base/legged_robot.py
@@ -582,8 +582,12 @@ def update_command_curriculum(self, env_ids):
         Args:
             env_ids (List[int]): ids of environments being reset
         """
+        low_vel_env_ids = (env_ids > (self.num_envs * 0.2))
+        high_vel_env_ids = (env_ids < (self.num_envs * 0.2))
+        low_vel_env_ids = env_ids[low_vel_env_ids.nonzero(as_tuple=True)]
+        high_vel_env_ids = env_ids[high_vel_env_ids.nonzero(as_tuple=True)]
         # If the tracking reward is above 80% of the maximum, increase the range of commands
-        if torch.mean(self.episode_sums["tracking_lin_vel"][env_ids]) / self.max_episode_length > 0.8 * self.reward_scales["tracking_lin_vel"]:
+        if (torch.mean(self.episode_sums["tracking_lin_vel"][low_vel_env_ids]) / self.max_episode_length > 0.8 * self.reward_scales["tracking_lin_vel"]) and (torch.mean(self.episode_sums["tracking_lin_vel"][high_vel_env_ids]) / self.max_episode_length > 0.8 * self.reward_scales["tracking_lin_vel"]):
             self.command_ranges["lin_vel_x"][0] = np.clip(self.command_ranges["lin_vel_x"][0] - 0.2, -self.cfg.commands.max_curriculum, 0.)
             self.command_ranges["lin_vel_x"][1] = np.clip(self.command_ranges["lin_vel_x"][1] + 0.2, 0., self.cfg.commands.max_curriculum)
 
diff --git a/legged_gym/legged_gym/envs/base/legged_robot_config.py b/legged_gym/legged_gym/envs/base/legged_robot_config.py
@@ -63,7 +63,7 @@ class terrain:
         num_rows= 10 # number of terrain rows (levels)
         num_cols = 20 # number of terrain cols (types)
         # terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete]
-        terrain_proportions = [0.1, 0.2, 0.30, 0.30, 0.1]
+        terrain_proportions = [0.1, 0.2, 0.3, 0.3, 0.1]
         # trimesh only:
         slope_treshold = 0.75 # slopes above this threshold will be corrected to vertical surfaces
 
diff --git a/legged_gym/legged_gym/envs/go1/go1_config.py b/legged_gym/legged_gym/envs/go1/go1_config.py
@@ -103,9 +103,9 @@ class scales:
             stand_still = -0.
             torques = -0.0
             dof_vel = -0.0
-            dof_pos_limits = -5.0
-            dof_vel_limits = -5.0
-            torque_limits = -5.0
+            dof_pos_limits = -0.0
+            dof_vel_limits = -0.0
+            torque_limits = -0.0
 
         only_positive_rewards = False # if true negative total rewards are clipped at zero (avoids early termination problems)
         tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)
@@ -114,7 +114,7 @@ class scales:
         soft_torque_limit = 1.
         base_height_target = 0.30
         max_contact_force = 100. # forces above this value are penalized
-        clearance_height_target = -0.2
+        clearance_height_target = -0.20
 
 class Go1RoughCfgPPO( LeggedRobotCfgPPO ):
     class algorithm( LeggedRobotCfgPPO.algorithm ):
diff --git a/legged_gym/legged_gym/scripts/export_policy.py b/legged_gym/legged_gym/scripts/export_policy.py
diff --git a/legged_gym/legged_gym/utils/helpers.py b/legged_gym/legged_gym/utils/helpers.py
@@ -35,6 +35,7 @@
 import random
 from isaacgym import gymapi
 from isaacgym import gymutil
+import torch.nn.functional as F
 
 from legged_gym import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
 
@@ -228,8 +229,10 @@ def __init__(self, actor_critic):
         self.estimator = copy.deepcopy(actor_critic.estimator.encoder)
 
     def forward(self, obs_history):
-        latent = self.estimator(obs_history)[:, 0:19]
-        return self.actor(torch.cat((obs_history[:, 0:45], latent), dim=1))
+        parts = self.estimator(obs_history)[:, 0:19]
+        vel, z = parts[..., :3], parts[..., 3:]
+        z = F.normalize(z, dim=-1, p=2.0)
+        return self.actor(torch.cat((obs_history[:, 0:45], vel, z), dim=1))
 
     def export(self, path):
         os.makedirs(path, exist_ok=True)
diff --git a/rsl_rl/rsl_rl/algorithms/him_ppo.py b/rsl_rl/rsl_rl/algorithms/him_ppo.py
@@ -127,10 +127,6 @@ def update(self):
         for obs_batch, critic_obs_batch, actions_batch, next_critic_obs_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
             old_mu_batch, old_sigma_batch in generator:
                 
-                #Estimator Update
-                estimation_loss, swap_loss = self.actor_critic.estimator.update(obs_batch, next_critic_obs_batch)
-
-
                 self.actor_critic.act(obs_batch)
                 actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)
                 value_batch = self.actor_critic.evaluate(critic_obs_batch)
@@ -153,6 +149,8 @@ def update(self):
                         for param_group in self.optimizer.param_groups:
                             param_group['lr'] = self.learning_rate
 
+                #Estimator Update
+                estimation_loss, swap_loss = self.actor_critic.estimator.update(obs_batch, next_critic_obs_batch, lr=self.learning_rate)
 
                 # Surrogate loss
                 ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))
diff --git a/rsl_rl/rsl_rl/modules/him_estimator.py b/rsl_rl/rsl_rl/modules/him_estimator.py
@@ -54,7 +54,8 @@ def __init__(self,
         self.proto = nn.Embedding(num_prototype, enc_hidden_dims[-1])
 
         # Optimizer
-        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)
+        self.learning_rate = learning_rate
+        self.optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
 
     def get_latent(self, obs_history):
         vel, z = self.encode(obs_history)
@@ -72,7 +73,12 @@ def encode(self, obs_history):
         z = F.normalize(z, dim=-1, p=2)
         return vel, z
 
-    def update(self, obs_history, next_critic_obs):
+    def update(self, obs_history, next_critic_obs, lr=None):
+        if lr is not None:
+            self.learning_rate = lr
+            for param_group in self.optimizer.param_groups:
+                param_group['lr'] = self.learning_rate
+                
         vel = next_critic_obs[:, self.num_one_step_obs:self.num_one_step_obs+3].detach()
         next_obs = next_critic_obs.detach()[:, 3:self.num_one_step_obs+3]