Final version of RL model

C-Earl · C-Earl · commit 5b6f3d2b3f6b · 2024-09-15T14:15:34.000-04:00
diff --git a/scripts/Chris/DQN/Environment.py b/scripts/Chris/DQN/Environment.py
@@ -24,7 +24,6 @@ def __init__(self, width, height):
     self.agent_cell = self.maze.start_cell
     self.num_actions = 4
     self.history = [(self.agent_cell.coordinates, 0, False, {})]  # (state, reward, done, info)
-    self.pos_history = []
 
   def plot(self):
     # Box around maze
@@ -53,8 +52,7 @@ def plot(self):
 
   def reset(self):
     self.agent_cell = self.maze.start_cell
-    self.step_history = []
-    self.pos_history = []
+    self.history = [(self.agent_cell.coordinates, 0, False, {})]
     return self.agent_cell, {}
 
   # Takes action
@@ -72,14 +70,14 @@ def step(self, action):
 
     # Check if action runs into wall
     if action not in self.agent_cell.open_walls:
-      self.history.append((self.agent_cell.coordinates, -0.5, False, {}))
-      return self.agent_cell, -.5, False, {}
+      self.history.append((self.agent_cell.coordinates, -0.1, False, {}))
+      return self.agent_cell, -0.01, False, {}
 
     # Move agent
     else:
       self.agent_cell = self.maze.neighbor(self.agent_cell, action)
       if self.agent_cell == self.maze.end_cell:    # Check if agent has reached the end
-        self.history.append(self.agent_cell.coordinates, 1, True, {})
+        self.history.append((self.agent_cell.coordinates, 1, True, {}))
         return self.agent_cell, 1, True, {}
       else:
         self.history.append((self.agent_cell.coordinates, 0, False, {}))
@@ -89,14 +87,14 @@ def save(self, filename):
     with open(filename, 'wb') as f:
       pkl.dump(self, f)
 
-  def animate_history(self):
+  def animate_history(self, file_name='maze.gif'):
     def update(i):
       plt.clf()
       self.plot()
       plt.plot(self.history[i][0][1], self.history[i][0][0], 'yo')
       plt.title(f'Step {i}, Reward: {self.history[i][1]}')
     ani = FuncAnimation(plt.gcf(), update, frames=len(self.history), repeat=False)
-    ani.save('maze.gif', writer='ffmpeg', fps=10)
+    ani.save(file_name, writer='ffmpeg', fps=5)
 
 class Grid_Cell_Maze_Environment(Maze_Environment):
   def __init__(self, width, height):
@@ -132,7 +130,7 @@ def state_to_grid_cell_spikes(self, cell):
   target_net = DQN(input_size, n_actions).to(device)
   target_net.load_state_dict(policy_net.state_dict())
   optimizer = optim.AdamW(policy_net.parameters(), lr=lr, amsgrad=True)
-  memory = ReplayMemory(10000)
+  memory = ReplayMemory(1000)
   env = Grid_Cell_Maze_Environment(width=5, height=5)
 
   run_episode(env, policy_net, 'cpu', 100, eps=0.9)
diff --git a/scripts/Chris/DQN/Eval.ipynb b/scripts/Chris/DQN/Eval.ipynb
diff --git a/scripts/Chris/DQN/pipeline_executor.py b/scripts/Chris/DQN/pipeline_executor.py
@@ -12,19 +12,19 @@
   ## Constants ##
   WIDTH = 5
   HEIGHT = 5
-  SAMPLES_PER_POS = 5000
+  SAMPLES_PER_POS = 10
   NOISE = 0.1   # Noise in sampling
   WINDOW_FREQ = 10
   WINDOW_SIZE = 10
   NUM_CELLS = 20
-  X_RANGE = (0, 5)
-  Y_RANGE = (0, 5)
+  X_RANGE = (0, WIDTH)
+  Y_RANGE = (0, HEIGHT)
   SIM_TIME = 50
   MAX_SPIKE_FREQ = 0.8
   GC_MULTIPLES = 1
   EXC_SIZE = 250
   INH_SIZE = 50
-  STORE_SAMPLES = 100
+  STORE_SAMPLES = 0
   WINDOW_FREQ = 10
   WINDOW_SIZE = 10
   OUT_DIM = 2
@@ -66,28 +66,30 @@
   # # Spike Train Generation ##
   # spike_trains, labels, sorted_spike_trains = spike_train_generator(samples, labels, SIM_TIME, GC_MULTIPLES, MAX_SPIKE_FREQ)
   #
-  # # ## Association (Store) ##
+  # ## Association (Store) ##
   # store_reservoir(EXC_SIZE, INH_SIZE, STORE_SAMPLES, NUM_CELLS, GC_MULTIPLES, SIM_TIME, hyper_params, PLOT)
   #
   # # ## Association (Recall) ##
   # recall_reservoir(EXC_SIZE, INH_SIZE, SIM_TIME, PLOT)
   #
   # # Preprocess Recalls ##
-  # recalled_mem_preprocessing(WINDOW_FREQ, WINDOW_SIZE, PLOT)
+  # recalled_mem_preprocessing(WIDTH, HEIGHT, PLOT)
 
   ## Train DQN ##
   LR = 0.01
   EPS_START = 0.9
   EPS_END = 0.05
-  EPS_DECAY = 1000
+  DECAY_INTENSITY = 3  # higher
   TAU = 0.005
   GAMMA = 0.99
-  MAX_STEPS_PER_EP = 10
-  MAX_TOTAL_STEPS = 10
-  MAX_EPS = 3000
-  BATCH_SIZE = 128
+  MAX_STEPS_PER_EP = 100
+  MAX_TOTAL_STEPS = 15000
+  MAX_EPS = 500
+  BATCH_SIZE = 256
   INPUT_SIZE = EXC_SIZE + INH_SIZE
-  train_DQN(INPUT_SIZE, LR, BATCH_SIZE, EPS_START, EPS_END, EPS_DECAY, TAU, GAMMA, MAX_STEPS_PER_EP, MAX_TOTAL_STEPS, MAX_EPS)
+  train_DQN(INPUT_SIZE, WIDTH, HEIGHT, LR, BATCH_SIZE, EPS_START,
+            EPS_END, DECAY_INTENSITY, TAU, GAMMA, MAX_STEPS_PER_EP,
+            MAX_TOTAL_STEPS, MAX_EPS, PLOT)
 
   ## Train ANN ##
   # classify_recalls(OUT_DIM, TRAIN_RATIO, BATCH_SIZE, TRAIN_EPOCHS)
diff --git a/scripts/Chris/DQN/recall_reservoir.py b/scripts/Chris/DQN/recall_reservoir.py
@@ -32,25 +32,25 @@ def recall_reservoir(exc_size, inh_size, sim_time, plot=False):
     pkl.dump(recalled_memories_sorted, f)
 
   # Plot recalls
-  if plot:
-    positions = np.array([key for key in recalled_memories_sorted.keys()])
-    rand_inds = np.random.choice(range(len(positions)), 5)
-    for pos in positions[rand_inds]:
-      fig = plt.figure(figsize=(10, 3))
-      gs = fig.add_gridspec(1, 6)
-      ax1 = fig.add_subplot(gs[0, 0])
-      ax1.set_title(f"Position: {pos}")
-      avg_mem = np.mean(recalled_memories_sorted[tuple(pos)], axis=0)
-      ax1.imshow(avg_mem.T)
-      random_inds = np.random.choice(range(len(recalled_memories_sorted[tuple(pos)])), 5)
-      random_samples = np.array(recalled_memories_sorted[tuple(pos)])[random_inds]
-      vmin = np.min(random_samples)
-      vmax = np.max(random_samples)
-      for i in range(1, 5):
-        ax = fig.add_subplot(gs[0, i])
-        rand_sample = recalled_memories_sorted[tuple(pos)][random_inds[i]]
-        im = ax.imshow(np.expand_dims(rand_sample.T, axis=1).squeeze(), vmin=vmin, vmax=vmax)
-        ax.set_title(f"S{i}")
-        ax.set(xticklabels=[])
-        ax.set(yticklabels=[])
-      plt.show()
+  # if plot:
+  #   positions = np.array([key for key in recalled_memories_sorted.keys()])
+  #   rand_inds = np.random.choice(range(len(positions)), 5)
+  #   for pos in positions[rand_inds]:
+  #     fig = plt.figure(figsize=(10, 3))
+  #     gs = fig.add_gridspec(1, 6)
+  #     ax1 = fig.add_subplot(gs[0, 0])
+  #     ax1.set_title(f"Position: {pos}")
+  #     avg_mem = np.mean(recalled_memories_sorted[tuple(pos)], axis=0)
+  #     ax1.imshow(avg_mem.T)
+  #     random_inds = np.random.choice(range(len(recalled_memories_sorted[tuple(pos)])), 5)
+  #     random_samples = np.array(recalled_memories_sorted[tuple(pos)])[random_inds]
+  #     vmin = np.min(random_samples)
+  #     vmax = np.max(random_samples)
+  #     for i in range(1, 5):
+  #       ax = fig.add_subplot(gs[0, i])
+  #       rand_sample = recalled_memories_sorted[tuple(pos)][random_inds[i]]
+  #       im = ax.imshow(np.expand_dims(rand_sample.T, axis=1).squeeze(), vmin=vmin, vmax=vmax)
+  #       ax.set_title(f"S{i}")
+  #       ax.set(xticklabels=[])
+  #       ax.set(yticklabels=[])
+  #     plt.show()
diff --git a/scripts/Chris/DQN/recalled_mem_preprocessing.py b/scripts/Chris/DQN/recalled_mem_preprocessing.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 
-def recalled_mem_preprocessing(window_freq, window_size, plot):
+def recalled_mem_preprocessing(width, height, plot):
   print('Preprocessing recalled memories...')
 
   ## Load recalled memory spike-trains ##
@@ -46,21 +46,9 @@ def recalled_mem_preprocessing(window_freq, window_size, plot):
     pkl.dump(new_samples_sorted, f)
 
   if plot:
-    # positions = np.array([key for key in new_samples_sorted.keys()])
-    # fig = plt.figure(figsize=(10, 10))
-    # gs = fig.add_gridspec(nrows=5, ncols=5)
-    # for i, pos in enumerate(positions):
-    #   ax = fig.add_subplot(gs[int(pos[0]), int(pos[1])])
-    #   avg_mem = np.mean(new_samples_sorted[tuple(pos)], axis=0)
-    #   ax.set_title(f"Conf-Mat: {pos[0] * 5 + pos[1]}")
-    #   im = ax.imshow(np.expand_dims(avg_mem, axis=0))
-    #   ax.set_aspect('auto')
-    # plt.tight_layout()
-    # plt.show()
-
     positions = np.array([key for key in new_samples_sorted.keys()])
-    fig = plt.figure(figsize=(10, 10))
-    gs = fig.add_gridspec(nrows=5, ncols=5)
+    fig = plt.figure(figsize=(50, 50))
+    gs = fig.add_gridspec(nrows=width, ncols=height)
     for i, pos in enumerate(positions):
       ax = fig.add_subplot(gs[int(pos[0]), int(pos[1])])
       avg_mem = np.mean(recalled_memories_sorted[tuple(pos)], axis=0)
diff --git a/scripts/Chris/DQN/train_DQN.py b/scripts/Chris/DQN/train_DQN.py
@@ -135,26 +135,32 @@ def run_episode(env, policy_net, device, max_steps, eps=0):
 
 
 
-def train_DQN(input_size, lr, batch_size, eps_start, eps_end, eps_decay, tau, gamma, max_steps_per_ep, max_total_steps, max_eps):
+def train_DQN(input_size, env_width, env_height, lr, batch_size, eps_start,
+              eps_end, decay_intensity, tau, gamma, max_steps_per_ep, max_total_steps, max_eps, plot):
   device = 'cpu'
   n_actions = 4
   policy_net = DQN(input_size, n_actions).to(device)
   target_net = DQN(input_size, n_actions).to(device)
   target_net.load_state_dict(policy_net.state_dict())
   optimizer = optim.AdamW(policy_net.parameters(), lr=lr, amsgrad=True)
-  memory = ReplayMemory(10000)
-  env = Grid_Cell_Maze_Environment(width=5, height=5)
+  memory = ReplayMemory(1000)
+  env = Grid_Cell_Maze_Environment(width=env_width, height=env_height)
+
+  ## Pre-training recording ##
+  if plot:
+    run_episode(env, policy_net, device, 100, eps=0.9)
+    env.animate_history("pre_training.gif")
 
   episode_durations = []
   episodes = 0
   total_steps = 0
   print(env.maze)
-  while total_steps < max_total_steps and episodes < max_eps:
+  while total_steps < max_total_steps: # and episodes < max_eps:
     # Initialize the environment and get its state
     state, info = env.reset()
     state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
     for t in count():
-      eps = eps_end + (eps_start - eps_end) * math.exp(-1. * total_steps / eps_decay)
+      eps = eps_end + (eps_start - eps_end) * math.exp(-decay_intensity * total_steps / (max_total_steps))
       action = select_action(state, t, eps, policy_net, env)
       observation, reward, terminated, _ = env.step(action.item())
       reward = torch.tensor([reward], device=device)
@@ -188,5 +194,15 @@ def train_DQN(input_size, lr, batch_size, eps_start, eps_end, eps_decay, tau, ga
     print(f"Episode {episodes} lasted {t + 1} steps, eps = {round(eps, 2)} total steps = {total_steps}")
     episodes += 1
 
-  plt.plot(episode_durations)
-  plt.show()
+  ## Post-training recording ##
+  if plot:
+    env.reset()
+    run_episode(env, policy_net, device, 100, eps=0)  # eps = 0 -> no exploration
+    env.animate_history("post_training.gif")
+    plt.clf()
+
+    plt.plot(episode_durations)
+    plt.title("Episode durations")
+    plt.ylabel("Duration")
+    plt.xlabel("Episode")
+    plt.show()