From 74641e0027b140fb9acf5b8d1898483208ca7ee9 Mon Sep 17 00:00:00 2001 From: Andrew Mao Date: Tue, 10 Feb 2026 13:10:36 -0800 Subject: [PATCH 1/4] init notes --- src/software/ai/hl/stp/play/play.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/software/ai/hl/stp/play/play.h b/src/software/ai/hl/stp/play/play.h index b66f240b05..903bf5b9a8 100644 --- a/src/software/ai/hl/stp/play/play.h +++ b/src/software/ai/hl/stp/play/play.h @@ -191,4 +191,18 @@ class Play uint64_t sequence_number = 0; RobotNavigationObstacleFactory obstacle_factory; + + /** + * 1) matrix of each tactic + * 2) Robot w/ vector of prev tactics + costs (prune if out of threshold) + * 3) + * + * Info: + * - robot_cost_for_tactic for each robot + * + * Penalty Algorithms + * - give the same tactic a constant + * - Exponential punishment for matching tactics + * - Convolutions as a function of costs over time? For the given tactic? robot? + */ }; From d02644e9c604a4d3f73f37f634631a26a621b975 Mon Sep 17 00:00:00 2001 From: Andrew Mao Date: Tue, 10 Feb 2026 21:55:31 -0800 Subject: [PATCH 2/4] exp func added --- src/software/ai/hl/stp/play/play.cpp | 31 ++++++++++++++++++++++++++++ src/software/ai/hl/stp/play/play.h | 12 +++++++++++ 2 files changed, 43 insertions(+) diff --git a/src/software/ai/hl/stp/play/play.cpp b/src/software/ai/hl/stp/play/play.cpp index bdeccf8b35..a0cb613102 100644 --- a/src/software/ai/hl/stp/play/play.cpp +++ b/src/software/ai/hl/stp/play/play.cpp @@ -298,6 +298,10 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector, // "jobs" (the Tactics). Matrix matrix(num_rows, num_cols); + std::map prev_assignments; + std::map curr_assignments; + if (!prev_assignments.empty()) prev_assignments = assignments_cache.back(); + // Initialize the matrix with the cost of assigning each Robot to each Tactic for (size_t row = 0; row < num_rows; row++) { @@ -305,12 +309,33 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector, { Robot robot = robots_to_assign.at(row); std::shared_ptr tactic = tactic_vector.at(col); + PreviousAssignment current_assignment; + current_assignment.previous_tactic = tactic; + current_assignment.penality_cost = static_cast(8); + auto primitives = primitive_sets.at(col); CHECK(primitives.contains(robot.id())) << "Couldn't find a primitive for robot id " << robot.id(); double robot_cost_for_tactic = primitives.at(robot.id())->getEstimatedPrimitiveCost(); + + if (!prev_assignments.empty() && prev_assignments.contains(robot.id()) && tactic) + { + const PreviousAssignment& assignment = prev_assignments.at(robot.id()); + if (typeid(*assignment.previous_tactic) == typeid(*tactic)) + // TODO: Decrement/increment the penalty + current_assignment.penality_cost = std::max(current_assignment.penality_cost, assignment.penality_cost / 2); + else + { + // TODO: Apply the penalty + robot_cost_for_tactic += assignment.penality_cost; + current_assignment.penality_cost = assignment.penality_cost * 2; + } + } + + curr_assignments[robot.id()] = current_assignment; + std::set required_capabilities = tactic->robotCapabilityRequirements(); std::set robot_capabilities = @@ -335,6 +360,12 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector, } } + if (assignments_cache.size() > ASSIGNMENTS_CACHE_MAX_SIZE) + { + assignments_cache.pop_front(); + } + + assignments_cache.push_back(curr_assignments); // Apply the Munkres/Hungarian algorithm to the matrix. Munkres m; m.solve(matrix); diff --git a/src/software/ai/hl/stp/play/play.h b/src/software/ai/hl/stp/play/play.h index 903bf5b9a8..ed1f0cba0e 100644 --- a/src/software/ai/hl/stp/play/play.h +++ b/src/software/ai/hl/stp/play/play.h @@ -192,6 +192,17 @@ class Play RobotNavigationObstacleFactory obstacle_factory; + static constexpr uint32_t ASSIGNMENTS_CACHE_MAX_SIZE = 5; + + + struct PreviousAssignment + { + uint32_t penality_cost; + std::shared_ptr previous_tactic; + }; + + std::deque> assignments_cache; + /** * 1) matrix of each tactic * 2) Robot w/ vector of prev tactics + costs (prune if out of threshold) @@ -205,4 +216,5 @@ class Play * - Exponential punishment for matching tactics * - Convolutions as a function of costs over time? For the given tactic? robot? */ + }; From 56ce2374fb344d126da128a6500eea97373764e7 Mon Sep 17 00:00:00 2001 From: Andrew Mao Date: Thu, 26 Feb 2026 16:36:11 -0800 Subject: [PATCH 3/4] crash fix for unassigned tactic --- src/software/ai/hl/stp/tactic/receiver/receiver_fsm.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/software/ai/hl/stp/tactic/receiver/receiver_fsm.cpp b/src/software/ai/hl/stp/tactic/receiver/receiver_fsm.cpp index 216f5293f5..a4a53c875b 100644 --- a/src/software/ai/hl/stp/tactic/receiver/receiver_fsm.cpp +++ b/src/software/ai/hl/stp/tactic/receiver/receiver_fsm.cpp @@ -177,6 +177,15 @@ void ReceiverFSM::adjustReceive(const Update& event) TbotsProto::DribblerMode::MAX_FORCE, TbotsProto::BallCollisionType::ALLOW, AutoChipOrKick{AutoChipOrKickMode::OFF, 0})); } + else + { + event.common.set_primitive(std::make_unique( + event.common.robot, robot_pos, event.common.robot.orientation(), + TbotsProto::MaxAllowedSpeedMode::PHYSICAL_LIMIT, + TbotsProto::ObstacleAvoidanceMode::AGGRESSIVE, + TbotsProto::DribblerMode::MAX_FORCE, TbotsProto::BallCollisionType::ALLOW, + AutoChipOrKick{AutoChipOrKickMode::OFF, 0})); + } } bool ReceiverFSM::passStarted(const Update& event) From e4dad25f9c07daae34a0214a2bfc1ef993314f17 Mon Sep 17 00:00:00 2001 From: Andrew Mao Date: Tue, 10 Mar 2026 12:12:46 -0700 Subject: [PATCH 4/4] working --- src/software/ai/hl/stp/play/play.cpp | 61 ++++++++++++++------------ src/software/ai/hl/stp/play/play.h | 31 +++---------- src/software/thunderscope/constants.py | 2 +- 3 files changed, 41 insertions(+), 53 deletions(-) diff --git a/src/software/ai/hl/stp/play/play.cpp b/src/software/ai/hl/stp/play/play.cpp index a0cb613102..2585ad391b 100644 --- a/src/software/ai/hl/stp/play/play.cpp +++ b/src/software/ai/hl/stp/play/play.cpp @@ -25,6 +25,13 @@ Play::Play(std::shared_ptr ai_config_ptr, { halt_tactics.push_back(std::make_shared(ai_config_ptr)); } + previous_robot_tactics.reserve(MAX_ROBOT_IDS_PER_SIDE); + + for (auto& previous_tactic : previous_robot_tactics | std::views::values) + { + previous_tactic.resize(ASSIGNMENTS_CACHE_MAX_SIZE); + } + previous_robot_tactics_count.reserve(MAX_ROBOT_IDS_PER_SIDE); } PriorityTacticVector Play::getTactics(const WorldPtr &world_ptr) @@ -298,10 +305,6 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector, // "jobs" (the Tactics). Matrix matrix(num_rows, num_cols); - std::map prev_assignments; - std::map curr_assignments; - if (!prev_assignments.empty()) prev_assignments = assignments_cache.back(); - // Initialize the matrix with the cost of assigning each Robot to each Tactic for (size_t row = 0; row < num_rows; row++) { @@ -309,9 +312,6 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector, { Robot robot = robots_to_assign.at(row); std::shared_ptr tactic = tactic_vector.at(col); - PreviousAssignment current_assignment; - current_assignment.previous_tactic = tactic; - current_assignment.penality_cost = static_cast(8); auto primitives = primitive_sets.at(col); CHECK(primitives.contains(robot.id())) @@ -320,21 +320,19 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector, primitives.at(robot.id())->getEstimatedPrimitiveCost(); - if (!prev_assignments.empty() && prev_assignments.contains(robot.id()) && tactic) - { - const PreviousAssignment& assignment = prev_assignments.at(robot.id()); - if (typeid(*assignment.previous_tactic) == typeid(*tactic)) - // TODO: Decrement/increment the penalty - current_assignment.penality_cost = std::max(current_assignment.penality_cost, assignment.penality_cost / 2); - else - { - // TODO: Apply the penalty - robot_cost_for_tactic += assignment.penality_cost; - current_assignment.penality_cost = assignment.penality_cost * 2; - } - } - - curr_assignments[robot.id()] = current_assignment; + /** + * We want to discourage changing away from a tactic over and over again it is chosen many times + * - penalty should range from 0 to 0.5 + * - store last k assignment matrices + * We can add this flatly to the score + * - score + 0.5 * (# of matching_current)/k + * We can also increase based on a fraction of the current score e.g. + * - score( 1 + 0.15 * (# of matching_current)/k + */ + int previous_tactic_count = previous_robot_tactics_count[robot.id()][std::type_index(typeid(*tactic))]; + double similarity = static_cast(previous_tactic_count) / ASSIGNMENTS_CACHE_MAX_SIZE; + // LOG(DEBUG) << "COUNT:"<< previous_tactic_count <<" SIM:"<< similarity; + robot_cost_for_tactic = std::max(0.0, robot_cost_for_tactic - 0.5 * similarity); std::set required_capabilities = tactic->robotCapabilityRequirements(); @@ -360,14 +358,9 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector, } } - if (assignments_cache.size() > ASSIGNMENTS_CACHE_MAX_SIZE) - { - assignments_cache.pop_front(); - } - - assignments_cache.push_back(curr_assignments); // Apply the Munkres/Hungarian algorithm to the matrix. Munkres m; + // LOG(DEBUG) << matrix; m.solve(matrix); // The Munkres matrix gets solved such that there will be exactly one 0 in every @@ -391,6 +384,18 @@ Play::assignTactics(const WorldPtr &world_ptr, TacticVector tactic_vector, robot_id); tactic_vector.at(col)->setLastExecutionRobot(robot_id); + auto tactic = tactic_vector.at(col); + if (previous_robot_tactics[robot_id].size() >= ASSIGNMENTS_CACHE_MAX_SIZE) + { + std::type_index tactic_type = *previous_robot_tactics[robot_id].front(); + previous_robot_tactics_count[robot_id][tactic_type]--; + previous_robot_tactics[robot_id].pop_front(); + + } + std::type_index type = std::type_index(typeid(*tactic)); + previous_robot_tactics[robot_id].push_back(std::make_shared(type)); + previous_robot_tactics_count[robot_id][type]++; + auto primitives = primitive_sets.at(col); CHECK(primitives.contains(robot_id)) << "Couldn't find a primitive for robot id " << robot_id; diff --git a/src/software/ai/hl/stp/play/play.h b/src/software/ai/hl/stp/play/play.h index ed1f0cba0e..f829e3f0dd 100644 --- a/src/software/ai/hl/stp/play/play.h +++ b/src/software/ai/hl/stp/play/play.h @@ -2,6 +2,8 @@ #include #include +#include +#include #include "proto/parameters.pb.h" #include "software/ai/hl/stp/play/play_fsm.hpp" @@ -192,29 +194,10 @@ class Play RobotNavigationObstacleFactory obstacle_factory; - static constexpr uint32_t ASSIGNMENTS_CACHE_MAX_SIZE = 5; - - - struct PreviousAssignment - { - uint32_t penality_cost; - std::shared_ptr previous_tactic; - }; - - std::deque> assignments_cache; - - /** - * 1) matrix of each tactic - * 2) Robot w/ vector of prev tactics + costs (prune if out of threshold) - * 3) - * - * Info: - * - robot_cost_for_tactic for each robot - * - * Penalty Algorithms - * - give the same tactic a constant - * - Exponential punishment for matching tactics - * - Convolutions as a function of costs over time? For the given tactic? robot? - */ + static constexpr uint32_t ASSIGNMENTS_CACHE_MAX_SIZE = 20; + using TacticTypeDeque = std::deque>; + using RobotPreviousTactics = std::unordered_map; + std::unordered_map previous_robot_tactics; + std::unordered_map previous_robot_tactics_count; }; diff --git a/src/software/thunderscope/constants.py b/src/software/thunderscope/constants.py index 217d87a302..35254fa77b 100644 --- a/src/software/thunderscope/constants.py +++ b/src/software/thunderscope/constants.py @@ -75,7 +75,7 @@ class EstopMode(IntEnum): ) # How long AI vs AI runs before ending in CI -CI_DURATION_S = 180 +CI_DURATION_S = 600 MULTI_PLANE_POINTS = 3