From 74641e0027b140fb9acf5b8d1898483208ca7ee9 Mon Sep 17 00:00:00 2001
From: Andrew Mao <andrewyxmao@gmail.com>
Date: Tue, 10 Feb 2026 13:10:36 -0800
Subject: [PATCH 1/4] init notes

---
 src/software/ai/hl/stp/play/play.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/src/software/ai/hl/stp/play/play.h b/src/software/ai/hl/stp/play/play.h
index b66f240b05..903bf5b9a8 100644
--- a/src/software/ai/hl/stp/play/play.h
+++ b/src/software/ai/hl/stp/play/play.h
@@ -191,4 +191,18 @@ class Play
     uint64_t sequence_number = 0;
 
     RobotNavigationObstacleFactory obstacle_factory;
+
+    /**
+     * 1) matrix of each tactic
+     * 2) Robot w/ vector of prev tactics + costs (prune if out of threshold)
+     * 3)
+     *
+     * Info:
+     * - robot_cost_for_tactic for each robot
+     *
+     * Penalty Algorithms
+     * - give the same tactic a constant
+     * - Exponential punishment for matching tactics
+     * - Convolutions as a function of costs over time? For the given tactic? robot?
+     */
 };

From d02644e9c604a4d3f73f37f634631a26a621b975 Mon Sep 17 00:00:00 2001
From: Andrew Mao <andrewyxmao@gmail.com>
Date: Tue, 10 Feb 2026 21:55:31 -0800
Subject: [PATCH 2/4] exp func added

---
 src/software/ai/hl/stp/play/play.cpp | 31 ++++++++++++++++++++++++++++
 src/software/ai/hl/stp/play/play.h   | 12 +++++++++++
 2 files changed, 43 insertions(+)

diff --git a/src/software/ai/hl/stp/play/play.cpp b/src/software/ai/hl/stp/play/play.cpp
index bdeccf8b35..a0cb613102 100644
--- a/src/software/ai/hl/stp/play/play.cpp
+++ b/src/software/ai/hl/stp/play/play.cpp
@@ -298,6 +298,10 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector,
     // "jobs" (the Tactics).
     Matrix<double> matrix(num_rows, num_cols);
 
+    std::map<const RobotId, PreviousAssignment> prev_assignments;
+    std::map<const RobotId, PreviousAssignment> curr_assignments;
+    if (!prev_assignments.empty()) prev_assignments = assignments_cache.back();
+
     // Initialize the matrix with the cost of assigning each Robot to each Tactic
     for (size_t row = 0; row < num_rows; row++)
     {
@@ -305,12 +309,33 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector,
         {
             Robot robot                    = robots_to_assign.at(row);
             std::shared_ptr<Tactic> tactic = tactic_vector.at(col);
+            PreviousAssignment current_assignment;
+            current_assignment.previous_tactic = tactic;
+            current_assignment.penality_cost = static_cast<uint32_t>(8);
+
             auto primitives                = primitive_sets.at(col);
             CHECK(primitives.contains(robot.id()))
                 << "Couldn't find a primitive for robot id " << robot.id();
             double robot_cost_for_tactic =
                 primitives.at(robot.id())->getEstimatedPrimitiveCost();
 
+
+            if (!prev_assignments.empty() && prev_assignments.contains(robot.id()) && tactic)
+            {
+                const PreviousAssignment& assignment = prev_assignments.at(robot.id());
+                if (typeid(*assignment.previous_tactic) == typeid(*tactic))
+                    // TODO: Decrement/increment the penalty
+                    current_assignment.penality_cost = std::max(current_assignment.penality_cost, assignment.penality_cost / 2);
+                else
+                {
+                    // TODO: Apply the penalty
+                    robot_cost_for_tactic += assignment.penality_cost;
+                    current_assignment.penality_cost = assignment.penality_cost * 2;
+                }
+            }
+
+            curr_assignments[robot.id()] = current_assignment;
+
             std::set<RobotCapability> required_capabilities =
                 tactic->robotCapabilityRequirements();
             std::set<RobotCapability> robot_capabilities =
@@ -335,6 +360,12 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector,
         }
     }
 
+    if (assignments_cache.size() > ASSIGNMENTS_CACHE_MAX_SIZE)
+    {
+        assignments_cache.pop_front();
+    }
+
+    assignments_cache.push_back(curr_assignments);
     // Apply the Munkres/Hungarian algorithm to the matrix.
     Munkres<double> m;
     m.solve(matrix);
diff --git a/src/software/ai/hl/stp/play/play.h b/src/software/ai/hl/stp/play/play.h
index 903bf5b9a8..ed1f0cba0e 100644
--- a/src/software/ai/hl/stp/play/play.h
+++ b/src/software/ai/hl/stp/play/play.h
@@ -192,6 +192,17 @@ class Play
 
     RobotNavigationObstacleFactory obstacle_factory;
 
+    static constexpr uint32_t ASSIGNMENTS_CACHE_MAX_SIZE = 5;
+
+
+    struct PreviousAssignment
+    {
+        uint32_t penality_cost;
+        std::shared_ptr<const Tactic> previous_tactic;
+    };
+
+    std::deque<std::map<const RobotId, PreviousAssignment>> assignments_cache;
+
     /**
      * 1) matrix of each tactic
      * 2) Robot w/ vector of prev tactics + costs (prune if out of threshold)
@@ -205,4 +216,5 @@ class Play
      * - Exponential punishment for matching tactics
      * - Convolutions as a function of costs over time? For the given tactic? robot?
      */
+
 };

From 56ce2374fb344d126da128a6500eea97373764e7 Mon Sep 17 00:00:00 2001
From: Andrew Mao <andrewyxmao@gmail.com>
Date: Thu, 26 Feb 2026 16:36:11 -0800
Subject: [PATCH 3/4] crash fix for unassigned tactic

---
 src/software/ai/hl/stp/tactic/receiver/receiver_fsm.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/software/ai/hl/stp/tactic/receiver/receiver_fsm.cpp b/src/software/ai/hl/stp/tactic/receiver/receiver_fsm.cpp
index 216f5293f5..a4a53c875b 100644
--- a/src/software/ai/hl/stp/tactic/receiver/receiver_fsm.cpp
+++ b/src/software/ai/hl/stp/tactic/receiver/receiver_fsm.cpp
@@ -177,6 +177,15 @@ void ReceiverFSM::adjustReceive(const Update& event)
             TbotsProto::DribblerMode::MAX_FORCE, TbotsProto::BallCollisionType::ALLOW,
             AutoChipOrKick{AutoChipOrKickMode::OFF, 0}));
     }
+    else
+    {
+        event.common.set_primitive(std::make_unique<MovePrimitive>(
+            event.common.robot, robot_pos, event.common.robot.orientation(),
+            TbotsProto::MaxAllowedSpeedMode::PHYSICAL_LIMIT,
+            TbotsProto::ObstacleAvoidanceMode::AGGRESSIVE,
+            TbotsProto::DribblerMode::MAX_FORCE, TbotsProto::BallCollisionType::ALLOW,
+            AutoChipOrKick{AutoChipOrKickMode::OFF, 0}));
+    }
 }
 
 bool ReceiverFSM::passStarted(const Update& event)

From e4dad25f9c07daae34a0214a2bfc1ef993314f17 Mon Sep 17 00:00:00 2001
From: Andrew Mao <andrewyxmao@gmail.com>
Date: Tue, 10 Mar 2026 12:12:46 -0700
Subject: [PATCH 4/4] working

---
 src/software/ai/hl/stp/play/play.cpp   | 61 ++++++++++++++------------
 src/software/ai/hl/stp/play/play.h     | 31 +++----------
 src/software/thunderscope/constants.py |  2 +-
 3 files changed, 41 insertions(+), 53 deletions(-)

diff --git a/src/software/ai/hl/stp/play/play.cpp b/src/software/ai/hl/stp/play/play.cpp
index a0cb613102..2585ad391b 100644
--- a/src/software/ai/hl/stp/play/play.cpp
+++ b/src/software/ai/hl/stp/play/play.cpp
@@ -25,6 +25,13 @@ Play::Play(std::shared_ptr<const TbotsProto::AiConfig> ai_config_ptr,
     {
         halt_tactics.push_back(std::make_shared<HaltTactic>(ai_config_ptr));
     }
+    previous_robot_tactics.reserve(MAX_ROBOT_IDS_PER_SIDE);
+
+    for (auto& previous_tactic : previous_robot_tactics | std::views::values)
+    {
+        previous_tactic.resize(ASSIGNMENTS_CACHE_MAX_SIZE);
+    }
+    previous_robot_tactics_count.reserve(MAX_ROBOT_IDS_PER_SIDE);
 }
 
 PriorityTacticVector Play::getTactics(const WorldPtr &world_ptr)
@@ -298,10 +305,6 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector,
     // "jobs" (the Tactics).
     Matrix<double> matrix(num_rows, num_cols);
 
-    std::map<const RobotId, PreviousAssignment> prev_assignments;
-    std::map<const RobotId, PreviousAssignment> curr_assignments;
-    if (!prev_assignments.empty()) prev_assignments = assignments_cache.back();
-
     // Initialize the matrix with the cost of assigning each Robot to each Tactic
     for (size_t row = 0; row < num_rows; row++)
     {
@@ -309,9 +312,6 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector,
         {
             Robot robot                    = robots_to_assign.at(row);
             std::shared_ptr<Tactic> tactic = tactic_vector.at(col);
-            PreviousAssignment current_assignment;
-            current_assignment.previous_tactic = tactic;
-            current_assignment.penality_cost = static_cast<uint32_t>(8);
 
             auto primitives                = primitive_sets.at(col);
             CHECK(primitives.contains(robot.id()))
@@ -320,21 +320,19 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector,
                 primitives.at(robot.id())->getEstimatedPrimitiveCost();
 
 
-            if (!prev_assignments.empty() && prev_assignments.contains(robot.id()) && tactic)
-            {
-                const PreviousAssignment& assignment = prev_assignments.at(robot.id());
-                if (typeid(*assignment.previous_tactic) == typeid(*tactic))
-                    // TODO: Decrement/increment the penalty
-                    current_assignment.penality_cost = std::max(current_assignment.penality_cost, assignment.penality_cost / 2);
-                else
-                {
-                    // TODO: Apply the penalty
-                    robot_cost_for_tactic += assignment.penality_cost;
-                    current_assignment.penality_cost = assignment.penality_cost * 2;
-                }
-            }
-
-            curr_assignments[robot.id()] = current_assignment;
+            /**
+             * We want to discourage changing away from a tactic over and over again it is chosen many times
+             * - penalty should range from 0 to 0.5
+             * - store last k assignment matrices
+             * We can add this flatly to the score
+             * - score + 0.5 * (# of matching_current)/k
+             * We can also increase based on a fraction of the current score e.g.
+             * - score( 1 + 0.15 * (# of matching_current)/k
+             */
+            int previous_tactic_count = previous_robot_tactics_count[robot.id()][std::type_index(typeid(*tactic))];
+            double similarity =  static_cast<double>(previous_tactic_count) / ASSIGNMENTS_CACHE_MAX_SIZE;
+            // LOG(DEBUG) << "COUNT:"<< previous_tactic_count <<" SIM:"<< similarity;
+            robot_cost_for_tactic = std::max(0.0, robot_cost_for_tactic - 0.5 * similarity);
 
             std::set<RobotCapability> required_capabilities =
                 tactic->robotCapabilityRequirements();
@@ -360,14 +358,9 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector,
         }
     }
 
-    if (assignments_cache.size() > ASSIGNMENTS_CACHE_MAX_SIZE)
-    {
-        assignments_cache.pop_front();
-    }
-
-    assignments_cache.push_back(curr_assignments);
     // Apply the Munkres/Hungarian algorithm to the matrix.
     Munkres<double> m;
+    // LOG(DEBUG) << matrix;
     m.solve(matrix);
 
     // The Munkres matrix gets solved such that there will be exactly one 0 in every
@@ -391,6 +384,18 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector,
                                                            robot_id);
                 tactic_vector.at(col)->setLastExecutionRobot(robot_id);
 
+                auto tactic = tactic_vector.at(col);
+                if (previous_robot_tactics[robot_id].size() >= ASSIGNMENTS_CACHE_MAX_SIZE)
+                {
+                    std::type_index tactic_type = *previous_robot_tactics[robot_id].front();
+                    previous_robot_tactics_count[robot_id][tactic_type]--;
+                    previous_robot_tactics[robot_id].pop_front();
+
+                }
+                std::type_index type = std::type_index(typeid(*tactic));
+                previous_robot_tactics[robot_id].push_back(std::make_shared<std::type_index>(type));
+                previous_robot_tactics_count[robot_id][type]++;
+
                 auto primitives = primitive_sets.at(col);
                 CHECK(primitives.contains(robot_id))
                     << "Couldn't find a primitive for robot id " << robot_id;
diff --git a/src/software/ai/hl/stp/play/play.h b/src/software/ai/hl/stp/play/play.h
index ed1f0cba0e..f829e3f0dd 100644
--- a/src/software/ai/hl/stp/play/play.h
+++ b/src/software/ai/hl/stp/play/play.h
@@ -2,6 +2,8 @@
 
 #include <boost/coroutine2/all.hpp>
 #include <vector>
+#include <typeindex>
+#include <typeinfo>
 
 #include "proto/parameters.pb.h"
 #include "software/ai/hl/stp/play/play_fsm.hpp"
@@ -192,29 +194,10 @@ class Play
 
     RobotNavigationObstacleFactory obstacle_factory;
 
-    static constexpr uint32_t ASSIGNMENTS_CACHE_MAX_SIZE = 5;
-
-
-    struct PreviousAssignment
-    {
-        uint32_t penality_cost;
-        std::shared_ptr<const Tactic> previous_tactic;
-    };
-
-    std::deque<std::map<const RobotId, PreviousAssignment>> assignments_cache;
-
-    /**
-     * 1) matrix of each tactic
-     * 2) Robot w/ vector of prev tactics + costs (prune if out of threshold)
-     * 3)
-     *
-     * Info:
-     * - robot_cost_for_tactic for each robot
-     *
-     * Penalty Algorithms
-     * - give the same tactic a constant
-     * - Exponential punishment for matching tactics
-     * - Convolutions as a function of costs over time? For the given tactic? robot?
-     */
+    static constexpr uint32_t ASSIGNMENTS_CACHE_MAX_SIZE = 20;
 
+    using TacticTypeDeque = std::deque<std::shared_ptr<std::type_index>>;
+    using RobotPreviousTactics = std::unordered_map<std::type_index, int>;
+    std::unordered_map<RobotId,TacticTypeDeque> previous_robot_tactics;
+    std::unordered_map<RobotId, RobotPreviousTactics> previous_robot_tactics_count;
 };
diff --git a/src/software/thunderscope/constants.py b/src/software/thunderscope/constants.py
index 217d87a302..35254fa77b 100644
--- a/src/software/thunderscope/constants.py
+++ b/src/software/thunderscope/constants.py
@@ -75,7 +75,7 @@ class EstopMode(IntEnum):
 )
 
 # How long AI vs AI runs before ending in CI
-CI_DURATION_S = 180
+CI_DURATION_S = 600
 
 MULTI_PLANE_POINTS = 3