-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathproblem1_sol.py
More file actions
89 lines (69 loc) · 3.47 KB
/
problem1_sol.py
File metadata and controls
89 lines (69 loc) · 3.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# !/usr/bin/env python
"""
Solution for Problem 1 of the Assignment.
"""
# ****************************************** Libraries to be imported ****************************************** #
from __future__ import print_function
import os
import gym
import argparse
import numpy as np
from tqdm import tqdm
from data_store import DataStore
from problem_1_support.nn_model_1 import NNModel
from problem_1_support.game_loop_1 import GameLoop
from problem_1_support.visualize_policy_1 import VisualizePolicy
# ****************************************** Main Program Start ****************************************** #
def main(args_):
num_episodes = 1000
env = gym.make('MountainCar-v0')
env.seed(40)
max_ep_reward, max_pos_val = -300, -10
data_store = DataStore(max_memory=10000)
nn_model = NNModel(in_size=env.observation_space.shape[0], out_size=env.action_space.n, batch_size=64)
game_loop = GameLoop(data_store=data_store, nn_model=nn_model, env=env)
reward_array = np.zeros(num_episodes, dtype=np.float32)
if args_.test_run:
nn_model.model.load_weights("./saved_models/unmod_reward_p_1_2.h5")
episode_cost = game_loop.test_episode(render=args_.display)
VisualizePolicy(nn_model=nn_model)
if args_.debug:
print("\t Episode reward: %6.3f" % episode_cost)
else:
for i in tqdm(range(num_episodes)):
episode_reward, max_pos, mean_cost = game_loop.train_episode(args_.display, args_.mod_reward)
reward_array[i] = episode_reward
nn_model.write_logs(mean_cost, max_pos, episode_reward, i)
if args_.debug:
max_ep_reward = max(max_ep_reward, episode_reward)
max_pos_val = max(max_pos_val, max_pos)
if i % 10 == 0:
print("\t Maximum episode reward: %6.3f, Max Position Value: %5.2f, Last Mean Cost: %8.6f"
% (max_ep_reward, max_pos_val, mean_cost))
# noinspection PyTypeChecker
np.savetxt('reward.txt', reward_array, delimiter=',')
# ****************************************** Main Program End ****************************************** #
if __name__ == '__main__':
try:
argparser = argparse.ArgumentParser(description='Gaussian Process Regression on Noisy 1D Sine Wave Data')
argparser.add_argument('-d', '--display', action='store_true', dest='display', help='display solution plot')
argparser.add_argument('-v', '--verbose', action='store_true', dest='debug', help='print debug information')
argparser.add_argument('-m', '--mod_reward', action='store_true', dest='mod_reward', help='modify reward')
argparser.add_argument('-t', '--test_run', action='store_true', dest='test_run', help='test run the model')
argparser.add_argument('-g', '--gpu', action='store_true', dest='gpu', help='use gpu for neural network')
argparser.add_argument('-w', '--write', action='store_true', dest='write', help='write solution file')
args = argparser.parse_args()
if not args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
print('\n\tRunning on CPU as per user instruction.\n')
else:
print('\n\tRunning on GPU as per user instruction.\n')
main(args)
except KeyboardInterrupt:
print('\nProcess interrupted by user. Bye!')
"""
Author: Yash Bansod
UID: 116776547
E-mail: yashb@umd.edu
Organisation: University of Maryland, College Park
"""