11import argparse
2- import gym
2+ import gymnasium as gym
33import os
44import threading
55import time
@@ -68,7 +68,7 @@ class Observer:
6868 def __init__ (self , batch = True ):
6969 self .id = rpc .get_worker_info ().id - 1
7070 self .env = gym .make ('CartPole-v1' )
71- self .env .seed ( args .seed )
71+ self .env .reset ( seed = args .seed )
7272 self .select_action = Agent .select_action_batch if batch else Agent .select_action
7373
7474 def run_episode (self , agent_rref , n_steps ):
@@ -92,10 +92,10 @@ def run_episode(self, agent_rref, n_steps):
9292 )
9393
9494 # apply the action to the environment, and get the reward
95- state , reward , done , _ = self .env .step (action )
95+ state , reward , terminated , truncated , _ = self .env .step (action )
9696 rewards [step ] = reward
9797
98- if done or step + 1 >= n_steps :
98+ if terminated or truncated or step + 1 >= n_steps :
9999 curr_rewards = rewards [start_step :(step + 1 )]
100100 R = 0
101101 for i in range (curr_rewards .numel () - 1 , - 1 , - 1 ):
@@ -226,8 +226,7 @@ def run_worker(rank, world_size, n_episode, batch, print_log=True):
226226 last_reward , running_reward = agent .run_episode (n_steps = NUM_STEPS )
227227
228228 if print_log :
229- print ('Episode {}\t Last reward: {:.2f}\t Average reward: {:.2f}' .format (
230- i_episode , last_reward , running_reward ))
229+ print (f'Episode { i_episode } \t Last reward: { last_reward :.2f} \t Average reward: { running_reward :.2f} ' )
231230 else :
232231 # other ranks are the observer
233232 rpc .init_rpc (OBSERVER_NAME .format (rank ), rank = rank , world_size = world_size )
0 commit comments