-
-
Notifications
You must be signed in to change notification settings - Fork 96
Expand file tree
/
Copy pathmain.cpp
More file actions
141 lines (113 loc) · 4.63 KB
/
main.cpp
File metadata and controls
141 lines (113 loc) · 4.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// C++ API - OPENAIGYM
// #include "binding-cpp/include/gym/gym.h"
#include "iostream"
#include <boost/make_shared.hpp>
#include "ddpg_model.h"
#include <torch/torch.h>
#include "agent.h"
std::string getTimeString(double remaining) {
int hours = int(remaining / 3600); // Round down
remaining -= 3600 * hours;
int min = int(remaining / 60);
remaining -= 60 * min;
double sec = remaining;
std::stringstream ss;
ss << hours << ":" << std::setw(2) << std::setfill('0') << min << ":" << std::setw(2) << sec;
std::string retVal(ss.str());
return retVal;
}
/************** TESTING **********************/
void test_environment(const boost::shared_ptr<Gym::Environment>& env, Agent& agent, int checkPointNumber )
{
agent.loadCheckPoints(checkPointNumber);
Gym::State s;
env->reset(&s);
float total_reward = 0;
while (1) {
auto oldState = s;
auto action = agent.act(s.observation, false);
env->step(action, /*render=*/true, &s);
// assert(s.observation.size()==observation_space->sample().size());
total_reward += s.reward;
if (s.done){
break;
}
std::cout << "Average Score:\t" << total_reward << std::endl;
}
}
/************** TRAINING **********************/
void train_environment(const boost::shared_ptr<Gym::Environment> env, Agent& agent,
bool renderEnv, int episodes_to_run = 1)
{
auto startTime = std::chrono::system_clock::now();
auto episodeTime = startTime;
boost::circular_buffer<float> scoreBuffer{100};
for (int e=1; e <= episodes_to_run; e++)
{
Gym::State s;
env->reset(&s);
float total_reward = 0;
int total_steps = 0;
auto time_now = std::chrono::system_clock::now();
for (int i = 0; i < 300; i++) {
total_steps++;
auto oldState = s;
auto action = agent.act(s.observation, true);
env->step(action, renderEnv, &s);
// assert(s.observation.size()==observation_space->sample().size());
total_reward += s.reward;
agent.step(oldState.observation, action, s.reward, s.observation, s.done);
if (s.done) {
break;
}
}
scoreBuffer.push_back(total_reward);
if (e % (episodes_to_run/5) == 0) {
std::cout << "****************** Checkpoint saved: " << e << "Episodes *******************" << std::endl;
agent.saveCheckPoints(e);
}
if (e % 10 == 0) {
auto avg_mean = std::accumulate( scoreBuffer.end()-scoreBuffer.size(), scoreBuffer.end(), 0.0)/ scoreBuffer.size();
std::cout << "Episode:\t" << e << "\t\tAverage Score:\t" << avg_mean << "\t\tCurrent Score:\t" << total_reward <<
"\t\tEnv steps:\t" << total_steps << std::endl;
auto total_time = getTimeString(std::chrono::duration_cast<std::chrono::seconds>(time_now-startTime).count());
auto episode_time = getTimeString(std::chrono::duration_cast<std::chrono::seconds>(time_now-episodeTime).count());
std::cout << "Total:\t" << total_time << "\t\t100 Steps:\t" << episode_time << std::endl;
episodeTime = time_now;
}
}
}
int main() {
bool training = true;
try {
boost::shared_ptr<Gym::Client> client = Gym::client_create("127.0.0.1", 5000);
boost::shared_ptr<Gym::Environment> env = client->make("Pendulum-v0");
auto action_space = env->action_space();
auto observation_space = env->observation_space();
// Get Action_Size
int action_size = 0;
if (env->action_space()->type == Gym::Space::SpaceType::DISCRETE) {
action_size = env->action_space()->discreet_n;
} else { // CONTINUOUS
action_size = env->action_space()->box_shape[0];
}
// Get State Size
int state_size = 0;
if (env->observation_space()->type == Gym::Space::SpaceType::DISCRETE) {
state_size = env->observation_space()->discreet_n;
} else { // CONTINUOUS
state_size = env->observation_space()->box_shape[0];
}
auto agent = Agent(state_size , action_size, 2);
std::cout << "(main.cpp) state_size = " << state_size << std::endl;
std::cout << "(main.cpp) action_size = " << action_size << std::endl;
if (training)
train_environment(env, agent, /*render*/ false, 10000);
else
test_environment(env, agent, 100);
} catch (const std::exception& e) {
fprintf(stderr, "ERROR: %s\n", e.what());
return 1;
}
return 0;
}