Skip to content

Commit ef289ac

Browse files
committed
fix export bug and lr bug
1 parent 24a20be commit ef289ac

9 files changed

Lines changed: 33 additions & 47 deletions

File tree

legged_gym/legged_gym/envs/a1/a1_config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@ class scales:
103103
stand_still = -0.
104104
torques = -0.0
105105
dof_vel = -0.0
106-
dof_pos_limits = -5.0
107-
dof_vel_limits = -5.0
108-
torque_limits = -5.0
106+
dof_pos_limits = -0.0
107+
dof_vel_limits = -0.0
108+
torque_limits = -0.0
109109

110110
only_positive_rewards = False # if true negative total rewards are clipped at zero (avoids early termination problems)
111111
tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)

legged_gym/legged_gym/envs/aliengo/aliengo_config.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,18 +102,18 @@ class scales:
102102
stand_still = -0.
103103
torques = -0.0
104104
dof_vel = -0.0
105-
dof_pos_limits = -5.0
106-
dof_vel_limits = -5.0
107-
torque_limits = -5.0
105+
dof_pos_limits = 0.0
106+
dof_vel_limits = 0.0
107+
torque_limits = 0.0
108108

109109
only_positive_rewards = False # if true negative total rewards are clipped at zero (avoids early termination problems)
110110
tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)
111111
soft_dof_pos_limit = 0.95 # percentage of urdf limits, values above this limit are penalized
112112
soft_dof_vel_limit = 0.95
113113
soft_torque_limit = 0.95
114-
base_height_target = 0.40
114+
base_height_target = 0.30
115115
max_contact_force = 100. # forces above this value are penalized
116-
clearance_height_target = -0.22
116+
clearance_height_target = -0.20
117117

118118
class AlienGoRoughCfgPPO( LeggedRobotCfgPPO ):
119119
class algorithm( LeggedRobotCfgPPO.algorithm ):

legged_gym/legged_gym/envs/base/legged_robot.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,8 +582,12 @@ def update_command_curriculum(self, env_ids):
582582
Args:
583583
env_ids (List[int]): ids of environments being reset
584584
"""
585+
low_vel_env_ids = (env_ids > (self.num_envs * 0.2))
586+
high_vel_env_ids = (env_ids < (self.num_envs * 0.2))
587+
low_vel_env_ids = env_ids[low_vel_env_ids.nonzero(as_tuple=True)]
588+
high_vel_env_ids = env_ids[high_vel_env_ids.nonzero(as_tuple=True)]
585589
# If the tracking reward is above 80% of the maximum, increase the range of commands
586-
if torch.mean(self.episode_sums["tracking_lin_vel"][env_ids]) / self.max_episode_length > 0.8 * self.reward_scales["tracking_lin_vel"]:
590+
if (torch.mean(self.episode_sums["tracking_lin_vel"][low_vel_env_ids]) / self.max_episode_length > 0.8 * self.reward_scales["tracking_lin_vel"]) and (torch.mean(self.episode_sums["tracking_lin_vel"][high_vel_env_ids]) / self.max_episode_length > 0.8 * self.reward_scales["tracking_lin_vel"]):
587591
self.command_ranges["lin_vel_x"][0] = np.clip(self.command_ranges["lin_vel_x"][0] - 0.2, -self.cfg.commands.max_curriculum, 0.)
588592
self.command_ranges["lin_vel_x"][1] = np.clip(self.command_ranges["lin_vel_x"][1] + 0.2, 0., self.cfg.commands.max_curriculum)
589593

legged_gym/legged_gym/envs/base/legged_robot_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class terrain:
6363
num_rows= 10 # number of terrain rows (levels)
6464
num_cols = 20 # number of terrain cols (types)
6565
# terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete]
66-
terrain_proportions = [0.1, 0.2, 0.30, 0.30, 0.1]
66+
terrain_proportions = [0.1, 0.2, 0.3, 0.3, 0.1]
6767
# trimesh only:
6868
slope_treshold = 0.75 # slopes above this threshold will be corrected to vertical surfaces
6969

legged_gym/legged_gym/envs/go1/go1_config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@ class scales:
103103
stand_still = -0.
104104
torques = -0.0
105105
dof_vel = -0.0
106-
dof_pos_limits = -5.0
107-
dof_vel_limits = -5.0
108-
torque_limits = -5.0
106+
dof_pos_limits = -0.0
107+
dof_vel_limits = -0.0
108+
torque_limits = -0.0
109109

110110
only_positive_rewards = False # if true negative total rewards are clipped at zero (avoids early termination problems)
111111
tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)
@@ -114,7 +114,7 @@ class scales:
114114
soft_torque_limit = 1.
115115
base_height_target = 0.30
116116
max_contact_force = 100. # forces above this value are penalized
117-
clearance_height_target = -0.2
117+
clearance_height_target = -0.20
118118

119119
class Go1RoughCfgPPO( LeggedRobotCfgPPO ):
120120
class algorithm( LeggedRobotCfgPPO.algorithm ):

legged_gym/legged_gym/scripts/export_policy.py

Lines changed: 0 additions & 25 deletions
This file was deleted.

legged_gym/legged_gym/utils/helpers.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import random
3636
from isaacgym import gymapi
3737
from isaacgym import gymutil
38+
import torch.nn.functional as F
3839

3940
from legged_gym import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
4041

@@ -228,8 +229,10 @@ def __init__(self, actor_critic):
228229
self.estimator = copy.deepcopy(actor_critic.estimator.encoder)
229230

230231
def forward(self, obs_history):
231-
latent = self.estimator(obs_history)[:, 0:19]
232-
return self.actor(torch.cat((obs_history[:, 0:45], latent), dim=1))
232+
parts = self.estimator(obs_history)[:, 0:19]
233+
vel, z = parts[..., :3], parts[..., 3:]
234+
z = F.normalize(z, dim=-1, p=2.0)
235+
return self.actor(torch.cat((obs_history[:, 0:45], vel, z), dim=1))
233236

234237
def export(self, path):
235238
os.makedirs(path, exist_ok=True)

rsl_rl/rsl_rl/algorithms/him_ppo.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,6 @@ def update(self):
127127
for obs_batch, critic_obs_batch, actions_batch, next_critic_obs_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
128128
old_mu_batch, old_sigma_batch in generator:
129129

130-
#Estimator Update
131-
estimation_loss, swap_loss = self.actor_critic.estimator.update(obs_batch, next_critic_obs_batch)
132-
133-
134130
self.actor_critic.act(obs_batch)
135131
actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)
136132
value_batch = self.actor_critic.evaluate(critic_obs_batch)
@@ -153,6 +149,8 @@ def update(self):
153149
for param_group in self.optimizer.param_groups:
154150
param_group['lr'] = self.learning_rate
155151

152+
#Estimator Update
153+
estimation_loss, swap_loss = self.actor_critic.estimator.update(obs_batch, next_critic_obs_batch, lr=self.learning_rate)
156154

157155
# Surrogate loss
158156
ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))

rsl_rl/rsl_rl/modules/him_estimator.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ def __init__(self,
5454
self.proto = nn.Embedding(num_prototype, enc_hidden_dims[-1])
5555

5656
# Optimizer
57-
self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)
57+
self.learning_rate = learning_rate
58+
self.optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
5859

5960
def get_latent(self, obs_history):
6061
vel, z = self.encode(obs_history)
@@ -72,7 +73,12 @@ def encode(self, obs_history):
7273
z = F.normalize(z, dim=-1, p=2)
7374
return vel, z
7475

75-
def update(self, obs_history, next_critic_obs):
76+
def update(self, obs_history, next_critic_obs, lr=None):
77+
if lr is not None:
78+
self.learning_rate = lr
79+
for param_group in self.optimizer.param_groups:
80+
param_group['lr'] = self.learning_rate
81+
7682
vel = next_critic_obs[:, self.num_one_step_obs:self.num_one_step_obs+3].detach()
7783
next_obs = next_critic_obs.detach()[:, 3:self.num_one_step_obs+3]
7884

0 commit comments

Comments
 (0)