Renamed agent-level "MLP" references to "DQN" in code comments and messages.

kiran-sairam · kiran-sairam · commit 8487bf199238 · 2026-05-07T19:42:45.000-04:00
- run_experiments.py, record_gameplay.py, record_all_gifs.py: print/warning
  messages and CLI usage examples
- save_load.py: module docstring + save/load print messages
- train.py: docstring renamed and corrected (was claiming all agents use
  semi-gradient SARSA — the DQN agent actually uses off-policy Q-learning
  with replay)
- test_double_dqn.py: print label
diff --git a/record_all_gifs.py b/record_all_gifs.py
@@ -36,7 +36,7 @@
     HAS_TORCH = True
 except ImportError:
     HAS_TORCH = False
-    print("WARNING: PyTorch not installed. MLP GIFs will be skipped.")
+    print("WARNING: PyTorch not installed. DQN GIFs will be skipped.")
 
 GRID_SIZE = 20
 CELL_SIZE = 30
diff --git a/record_gameplay.py b/record_gameplay.py
@@ -98,9 +98,9 @@ def make_agent(algo, rep_name, weights_dir=WEIGHTS_DIR, name=None):
     """
     Create a fresh agent for the given algo/rep.
 
-    For MLP agents, reads the architecture (hidden_dims) directly from the
-    checkpoint file so v1 (hidden_dim=128) and v2 (hidden_dims=(256,128))
-    weights both load correctly without any flags.
+    For the DQN agent, reads the architecture (hidden_dims) directly from
+    the checkpoint file so any saved architecture loads correctly without
+    any flags.
     """
     rep = REPRESENTATIONS[rep_name]()
 
@@ -121,7 +121,7 @@ def make_agent(algo, rep_name, weights_dir=WEIGHTS_DIR, name=None):
 
     elif algo == "mlp":
         if not HAS_TORCH:
-            raise ImportError("PyTorch required for MLP")
+            raise ImportError("PyTorch required for DQN")
         mlp_name = name or weight_name(algo, rep_name)
         pt_path = os.path.join(weights_dir, f"{mlp_name}.pt")
         if os.path.exists(pt_path):
@@ -392,7 +392,7 @@ def main():
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
 examples:
-  # Watch MLP on compact representation
+  # Watch DQN on compact representation
   python record_gameplay.py --watch mlp compact
 
   # Watch a specific seed
diff --git a/run_experiments.py b/run_experiments.py
@@ -6,7 +6,7 @@
     3. max_steps_factor=3 (1,200 steps max)
     4. Larger tile hash table (262,144)
     5. Epsilon decay over 80% of training
-    6. Default 20,000 episodes (MLP needs more steps to converge with replay)
+    6. Default 20,000 episodes (DQN needs more steps to converge with replay)
 
 Usage:
     python run_experiments.py                          # all configs
@@ -42,7 +42,7 @@
     HAS_TORCH = True
 except ImportError:
     HAS_TORCH = False
-    print("WARNING: PyTorch not installed. MLP experiments will be skipped.")
+    print("WARNING: PyTorch not installed. DQN experiments will be skipped.")
 
 
 # ============================================================
@@ -189,7 +189,7 @@ def make_agent(algo: str, rep_instance, config: ExperimentConfig, seed: int, env
         return agent
     elif algo == "mlp":
         if not HAS_TORCH:
-            raise ImportError("PyTorch required for MLP agent")
+            raise ImportError("PyTorch required for DQN agent")
         p = config.algo_params
         return DoubleDQNAgent(
             representation=rep_instance,
diff --git a/snake_rl/agents/train.py b/snake_rl/agents/train.py
@@ -8,9 +8,11 @@
     - Logging (per-episode metrics via RunLogger)
     - Progress reporting
 
-The same loop is used for linear FA, tile coding, and MLP agents,
-since they all use the semi-gradient SARSA update — only the
-function approximation differs.
+The same loop is used for linear FA, tile coding, and DQN agents.
+Linear FA and tile coding agents do a semi-gradient SARSA update on
+each step; the DQN agent stores the transition and runs a mini-batch
+Double-Q update internally. Only the agent's `.update()` method differs
+— the surrounding loop interface is identical.
 """
 
 import time
diff --git a/snake_rl/utils/save_load.py b/snake_rl/utils/save_load.py
@@ -1,7 +1,7 @@
 """
 Save and load trained agent weights.
 
-Supports all three agent types: Linear FA, Tile Coding, MLP.
+Supports all three agent types: Linear FA, Tile Coding, DQN.
 """
 
 import os
@@ -45,7 +45,7 @@ def save_agent(agent, name: str, directory: str = WEIGHTS_DIR):
             }, filepath)
             print(f"Saved {agent_type} weights to {filepath}")
         except ImportError:
-            print("PyTorch required to save MLP weights")
+            print("PyTorch required to save DQN weights")
 
     else:
         raise ValueError(f"Unknown agent type: {agent_type}")
@@ -79,7 +79,7 @@ def load_agent_weights(agent, name: str, directory: str = WEIGHTS_DIR):
             agent.q_net.load_state_dict(checkpoint["q_net"])
             print(f"Loaded {agent_type} weights from {filepath}")
         except ImportError:
-            print("PyTorch required to load MLP weights")
+            print("PyTorch required to load DQN weights")
 
     else:
         raise ValueError(f"Unknown agent type: {agent_type}")
diff --git a/tests/test_double_dqn.py b/tests/test_double_dqn.py
@@ -249,7 +249,7 @@ def test_training_improves(self):
         early = np.mean(logger.scores[:300])
         late = np.mean(logger.scores[-300:])
         assert late > early, f"Scores should improve: early={early:.2f}, late={late:.2f}"
-        print(f"    [MLP] Early: {early:.2f} → Late: {late:.2f}")
+        print(f"    [DQN] Early: {early:.2f} → Late: {late:.2f}")
 
 
 # ============================================================