@@ -163,38 +163,38 @@ If your callback returns False, training is aborted early.
163163 from stable_baselines.results_plotter import load_results, ts2xy
164164 from stable_baselines import DDPG
165165 from stable_baselines.ddpg import AdaptiveParamNoiseSpec
166+ from stable_baselines import results_plotter
166167
167168
168169 best_mean_reward, n_steps = - np.inf, 0
169170
170171 def callback (_locals , _globals ):
171- """
172- Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
173- :param _locals: (dict)
174- :param _globals: (dict)
175- """
176- global n_steps, best_mean_reward
177- # Print stats every 1000 calls
178- if (n_steps + 1 ) % 1000 == 0 :
179- # Evaluate policy training performance
180- x, y = ts2xy(load_results(log_dir), ' timesteps' )
181- if len (x) > 0 :
182- mean_reward = np.mean(y[- 100 :])
183- print (x[- 1 ], ' timesteps' )
184- print (" Best mean reward: {:.2f } - Last mean reward per episode: {:.2f } " .format(best_mean_reward, mean_reward))
185-
186- # New best model, you could save the agent here
187- if mean_reward > best_mean_reward:
188- best_mean_reward = mean_reward
189- # Example for saving best model
190- print (" Saving new best model" )
191- _locals[' self' ].save(log_dir + ' best_model.pkl' )
192- n_steps += 1
193- return True
194-
172+ """
173+ Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
174+ :param _locals: (dict)
175+ :param _globals: (dict)
176+ """
177+ global n_steps, best_mean_reward
178+ # Print stats every 1000 calls
179+ if (n_steps + 1 ) % 1000 == 0 :
180+ # Evaluate policy training performance
181+ x, y = ts2xy(load_results(log_dir), ' timesteps' )
182+ if len (x) > 0 :
183+ mean_reward = np.mean(y[- 100 :])
184+ print (x[- 1 ], ' timesteps' )
185+ print (" Best mean reward: {:.2f } - Last mean reward per episode: {:.2f } " .format(best_mean_reward, mean_reward))
186+
187+ # New best model, you could save the agent here
188+ if mean_reward > best_mean_reward:
189+ best_mean_reward = mean_reward
190+ # Example for saving best model
191+ print (" Saving new best model" )
192+ _locals[' self' ].save(log_dir + ' best_model.pkl' )
193+ n_steps += 1
194+ return True
195195
196196 # Create log dir
197- log_dir = " / tmp/gym /"
197+ log_dir = " tmp/"
198198 os.makedirs(log_dir, exist_ok = True )
199199
200200 # Create and wrap the environment
@@ -206,7 +206,11 @@ If your callback returns False, training is aborted early.
206206 # Because we use parameter noise, we should use a MlpPolicy with layer normalization
207207 model = DDPG(LnMlpPolicy, env, param_noise = param_noise, verbose = 0 )
208208 # Train the agent
209- model.learn(total_timesteps = int (1e5 ), callback = callback)
209+ time_steps = 1e5
210+ model.learn(total_timesteps = int (time_steps), callback = callback)
211+
212+ results_plotter.plot_results([log_dir], time_steps, results_plotter.X_TIMESTEPS , " DDPG LunarLander" )
213+ plt.show()
210214
211215
212216 Atari Games
0 commit comments