ImperialBower
diff --git a/‎Cargo.toml‎
Lines changed: 12 additions & 0 deletions b/‎Cargo.toml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎ROADMAP.md‎
Lines changed: 1 addition & 0 deletions b/‎ROADMAP.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎data/exploit_configs/.gitkeep‎ b/‎data/exploit_configs/.gitkeep‎
diff --git a/‎data/exploit_configs/tag_trained.yaml‎
Lines changed: 16 additions & 0 deletions b/‎data/exploit_configs/tag_trained.yaml‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎docs/EPIC-28_Profile_Training.md‎
Lines changed: 13 additions & 13 deletions b/‎docs/EPIC-28_Profile_Training.md‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎examples/exploitative_play.rs‎
Lines changed: 48 additions & 16 deletions b/‎examples/exploitative_play.rs‎
Lines changed: 48 additions & 16 deletions
diff --git a/‎examples/train_exploit_config.rs‎
Lines changed: 120 additions & 0 deletions b/‎examples/train_exploit_config.rs‎
Lines changed: 120 additions & 0 deletions
diff --git a/‎src/bot/exploit.rs‎
Lines changed: 1 addition & 0 deletions b/‎src/bot/exploit.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/bot/mod.rs‎
Lines changed: 2 additions & 0 deletions b/‎src/bot/mod.rs‎
Lines changed: 2 additions & 0 deletions
@@ -44,6 +44,10 @@ player-stats = []
 ## `YamlPlayerStatsStore` (one YAML file per player Uuid). Off by default; opt
 ## in for multi-session experiments. EPIC-26 Phase 4.
 player-stats-persistence = ["player-stats", "dep:serde_yaml_bw"]
+## Enables `ExploitConfig` YAML serialisation and the `ExploitTrainer`
+## gradient-free optimisation loop (EPIC-28).  Requires player-stats and
+## bot-profiles; adds no external optimiser dependency.
+bot-training = ["player-stats", "bot-profiles", "dep:serde_yaml_bw"]
 
 [dependencies]
 bint = "0.1.15"
@@ -106,6 +110,10 @@ required-features = ["bot-profiles", "hand-histories", "player-stats"]
 name = "player_stats_session"
 required-features = ["bot-profiles", "hand-histories", "player-stats-persistence"]
 
+[[example]]
+name = "train_exploit_config"
+required-features = ["bot-training"]
+
 [[test]]
 name = "replay_consistency"
 required-features = ["hand-histories", "bot-profiles"]
@@ -130,6 +138,10 @@ required-features = ["bot-profiles", "hand-histories", "player-stats"]
 name = "player_stats_persistence"
 required-features = ["bot-profiles", "hand-histories", "player-stats-persistence"]
 
+[[test]]
+name = "training_integration"
+required-features = ["bot-training"]
+
 [dev-dependencies]
 serde_yaml_bw = "2.5"
 clap = { version = "4.6", features = ["derive", "unicode"] }
 
@@ -123,6 +123,7 @@ workspace)
 | [EPIC-25](docs/EPIC-25_Range_Frequencies.md) | Range Frequencies — optional per-combo frequency in range strings (`AA:0.5`) | Complete |
 | [EPIC-26](docs/EPIC-26_Player_Stats.md) | Player Action Tracking & Opponent Insights — `PlayerStats` / `StatsRegistry` keyed by `Uuid`, derived ratios (VPIP/PFR/AF/WTSD/c-bet/...), exposed to `BotDecider` (no behavior change), optional persistence | Complete |
 | [EPIC-27](docs/EPIC-27_Exploitative_Decider.md) | Adaptive Bot Framework — `ExploitativeDecider<D>` wrapper that converts opponent stats into runtime profile deviations; `ExploitConfig` with 8 deviation rules; `SimTable::new_with_registry`; demo + smoke tests | Complete |
+| [EPIC-28](docs/EPIC-28_Profile_Training.md) | Cross-Session Profile Training — `ExploitTrainer` (1+λ)-ES loop tunes `ExploitConfig` parameters against a static field; `bot-training` feature; YAML serialisation for trained configs; `train_exploit_config` example | Complete |
 | [FEATURE: Activate Bluff Fields](docs/FEATURE_BotProfile_ActivateBluffFields.md) | Wire `bluff_frequency`, `check_raise_frequency`, `postflop_cbet_frequency` into `RuleBasedDecider` | Complete |
 | [FEATURE: Position-Aware Decisions](docs/FEATURE_BotProfile_PositionAwareDecisions.md) | Route decisions through `Playbook` position-specific `BettingStrategy` | Complete |
 | [FEATURE: BotProfile Type Safety](docs/FEATURE_BotProfile_TypeSafety.md) | `PlayStyle` enum, `Percentage` newtype for frequency fields | Complete |
 
@@ -0,0 +1,16 @@
+fold_to_cbet_high_threshold: 0.7901468557501172
+fold_to_cbet_low_threshold: 0.5082579817735436
+vpip_calling_station_threshold: 0.8
+pfr_passive_threshold: 0.2269382705448002
+pfr_nit_threshold: 0.05691801021431529
+aggression_factor_threshold: 1.0
+wtsd_threshold: 0.2793460004461263
+three_bet_pct_threshold: 0.0649335383403233
+fold_to_cbet_high_multiplier: 1.4726932942656468
+fold_to_cbet_low_multiplier: 0.42904025856061506
+bluff_vs_station_multiplier: 0.2920890327962754
+bluff_vs_wtsd_multiplier: 0.5601817702892113
+aggression_vs_nit_multiplier: 0.9644932542068718
+aggression_vs_three_bettor_multiplier: 0.9026762150645852
+min_hands_light: 11
+min_hands_heavy: 21
@@ -4,19 +4,19 @@
 
 | Component | Status |
 |---|---|
-| Feature gate `bot-training` in `Cargo.toml` | ☐ Todo |
-| `ExploitConfig` serde support (YAML serialisation) | ☐ Todo |
-| Parameter encoding/decoding: `ExploitConfig ↔ Vec<f64>` | ☐ Todo |
-| `FitnessEvaluator` — fixed-seed BB/100 evaluation against the field | ☐ Todo |
-| `ExploitTrainer` struct and `train` method | ☐ Todo |
-| `TrainingConfig` struct with iteration budget, replicates, field definition | ☐ Todo |
-| `TrainingResult` struct with convergence history and per-opponent breakdown | ☐ Todo |
-| Module wiring: `src/bot/training/mod.rs`, `src/bot/mod.rs`, `src/prelude.rs` | ☐ Todo |
-| Unit tests: parameter round-trip, fitness monotonicity, default-config is valid | ☐ Todo |
-| Integration test: trainer improves over baseline on a 200-generation run | ☐ Todo |
-| Example: `examples/train_exploit_config.rs` — end-to-end training run | ☐ Todo |
-| Checked-in trained configs: `data/exploit_configs/tag_trained.yaml` | ☐ Todo |
-| `ROADMAP.md` Epics row | ☐ Todo |
+| Feature gate `bot-training` in `Cargo.toml` | ✅ Done |
+| `ExploitConfig` serde support (YAML serialisation) | ✅ Done |
+| Parameter encoding/decoding: `ExploitConfig ↔ Vec<f64>` | ✅ Done |
+| `FitnessEvaluator` — BB/100 evaluation against the field | ✅ Done |
+| `ExploitTrainer` struct and `train` method | ✅ Done |
+| `TrainingConfig` struct with iteration budget, replicates, field definition | ✅ Done |
+| `TrainingResult` struct with convergence history and per-opponent breakdown | ✅ Done |
+| Module wiring: `src/bot/training/mod.rs`, `src/bot/mod.rs`, `src/prelude.rs` | ✅ Done |
+| Unit tests: parameter round-trip, fitness monotonicity, default-config is valid | ✅ Done |
+| Integration test: trainer improves over baseline on a 200-generation run | ✅ Done |
+| Example: `examples/train_exploit_config.rs` — end-to-end training run | ✅ Done |
+| Checked-in trained configs: `data/exploit_configs/tag_trained.yaml` | ✅ Done |
+| `ROADMAP.md` Epics row | ✅ Done |
 
 ---
 
 
@@ -168,15 +168,16 @@ struct MatchResult {
 fn run_match(opponent_profile: BotProfile, opponent_name: &str) -> MatchResult {
     let exploit_player = PlayerNoCell::new_with_chips("TAG_exploit".to_string(), STARTING_CHIPS);
     let opp_player = PlayerNoCell::new_with_chips(opponent_name.to_string(), STARTING_CHIPS);
-    let seats = SeatsNoCell::new(vec![
-        SeatNoCell::new(exploit_player),
-        SeatNoCell::new(opp_player),
-    ]);
+    let seats = SeatsNoCell::new(vec![SeatNoCell::new(exploit_player), SeatNoCell::new(opp_player)]);
     let table = TableNoCell::nlh_from_seats(seats, ForcedBets::new(SB, BB));
 
     let telem = Arc::new(TelemetryDecider::new(ExploitConfig::default()));
     let bots: Vec<(u8, BotProfile, Box<dyn BotDecider>)> = vec![
-        (0, BotProfile::tight_aggressive(), Box::new(SharedTelemetry(Arc::clone(&telem)))),
+        (
+            0,
+            BotProfile::tight_aggressive(),
+            Box::new(SharedTelemetry(Arc::clone(&telem))),
+        ),
         (1, opponent_profile, Box::new(RuleBasedDecider)),
     ];
 
@@ -187,7 +188,11 @@ fn run_match(opponent_profile: BotProfile, opponent_name: &str) -> MatchResult {
     let opponent_delta = result.net_chips.get(&1).copied().unwrap_or(0);
     let counters = std::mem::take(&mut *telem.counters.lock().expect("lock not poisoned"));
 
-    MatchResult { exploit_delta, opponent_delta, counters }
+    MatchResult {
+        exploit_delta,
+        opponent_delta,
+        counters,
+    }
 }
 
 // ── Printing helpers ──────────────────────────────────────────────────────────
@@ -210,14 +215,38 @@ fn print_match(name: &str, r: &MatchResult) {
     );
     println!("│");
     println!("│  Rule firings across {HANDS} decisions:");
-    row("fold_to_cbet > 60%    c-bet more (opp folds flops)    ", r.counters.fold_to_cbet_high);
-    row("fold_to_cbet < 30%    c-bet less (opp is sticky)      ", r.counters.fold_to_cbet_low);
-    row("VPIP > 40%            bluff less (calling station)    ", r.counters.calling_station_bluff);
-    row("VPIP > 40% + PFR < 10% size up for value              ", r.counters.loose_passive_sizing);
-    row("PFR < 8%              less aggr (tight 3-bet range)   ", r.counters.nit_aggression);
-    row("AF > 4.0              widen calldown (aggro opp)      ", r.counters.aggro_calldown);
-    row("WTSD > 35%            bluff less (goes to showdown)   ", r.counters.wtsd_bluff);
-    row("3-bet% > 12%          less 4-bet bluff (freq 3-bettor)", r.counters.three_bettor_aggression);
+    row(
+        "fold_to_cbet > 60%    c-bet more (opp folds flops)    ",
+        r.counters.fold_to_cbet_high,
+    );
+    row(
+        "fold_to_cbet < 30%    c-bet less (opp is sticky)      ",
+        r.counters.fold_to_cbet_low,
+    );
+    row(
+        "VPIP > 40%            bluff less (calling station)    ",
+        r.counters.calling_station_bluff,
+    );
+    row(
+        "VPIP > 40% + PFR < 10% size up for value              ",
+        r.counters.loose_passive_sizing,
+    );
+    row(
+        "PFR < 8%              less aggr (tight 3-bet range)   ",
+        r.counters.nit_aggression,
+    );
+    row(
+        "AF > 4.0              widen calldown (aggro opp)      ",
+        r.counters.aggro_calldown,
+    );
+    row(
+        "WTSD > 35%            bluff less (goes to showdown)   ",
+        r.counters.wtsd_bluff,
+    );
+    row(
+        "3-bet% > 12%          less 4-bet bluff (freq 3-bettor)",
+        r.counters.three_bettor_aggression,
+    );
     println!("│");
     println!("│  Total rule applications: {}", r.counters.total());
     println!("└────────────────────────────────────────────────────────────────────");
@@ -234,14 +263,17 @@ fn row(label: &str, count: u64) {
 fn main() {
     println!();
     println!("EPIC-27 — ExploitativeDecider heads-up sessions");
-    println!("  {HANDS} hands · 50/{BB} blinds · {} billion chips each", STARTING_CHIPS / 1_000_000_000);
+    println!(
+        "  {HANDS} hands · 50/{BB} blinds · {} billion chips each",
+        STARTING_CHIPS / 1_000_000_000
+    );
     println!("  Rule counters = decisions where that rule was active");
     println!();
 
     for (name, profile) in [
         ("LoosePassive", BotProfile::loose_passive()),
         ("TightPassive", BotProfile::tight_passive()),
-        ("Maniac",       BotProfile::maniac()),
+        ("Maniac", BotProfile::maniac()),
     ] {
         let r = run_match(profile, name);
         print_match(name, &r);
 
@@ -0,0 +1,120 @@
+//! EPIC-28 demo — train an `ExploitConfig` via (1+λ)-ES against the default field.
+//!
+//! Runs gradient-free optimisation of `ExploitConfig` parameters, printing
+//! per-generation progress and saving the best config to YAML.
+//!
+//! Run with:
+//! ```text
+//! cargo run --features bot-training --example train_exploit_config
+//! cargo run --features bot-training --example train_exploit_config -- --output data/exploit_configs/custom.yaml
+//! cargo run --features bot-training --example train_exploit_config -- --generations 50 --hands 200
+//! ```
+
+use std::path::PathBuf;
+
+use pkcore::bot::exploit::ExploitConfig;
+use pkcore::bot::training::{ExploitTrainer, TrainingConfig};
+
+const DEFAULT_OUTPUT: &str = "data/exploit_configs/tag_trained.yaml";
+
+fn main() {
+    let args: Vec<String> = std::env::args().collect();
+    let (output_path, max_generations, hands_per_eval) = parse_args(&args);
+
+    println!();
+    println!("EPIC-28 — ExploitConfig training");
+    println!("  Optimizer : (1+λ)-ES, isotropic Gaussian mutation, 1/5 success rule");
+    println!("  Output    : {output_path}");
+    println!("  Generations : {max_generations}");
+    println!("  Hands/eval  : {hands_per_eval} × 3 replicates × 8 opponents");
+    println!();
+
+    let config = TrainingConfig {
+        max_generations,
+        hands_per_eval,
+        replicates: 3,
+        lambda: 10,
+        ..TrainingConfig::default()
+    };
+
+    let trainer = ExploitTrainer::new(config);
+
+    println!(
+        "{:<6}  {:>10}  {:>10}  {:>8}",
+        "Gen", "Best BB/100", "Mean BB/100", "Sigma"
+    );
+    println!("{}", "─".repeat(44));
+
+    // We call train and print history after completion.
+    // For a live-progress version the trainer would need a callback hook —
+    // a clean extension for EPIC-29 if desired.
+    let result = trainer.train(&ExploitConfig::default());
+
+    // Print every 10th generation plus the final one.
+    for rec in &result.history {
+        if rec.generation % 10 == 0 || rec.generation + 1 == result.generations_run {
+            println!(
+                "{:<6}  {:>+10.1}  {:>+10.1}  {:>8.5}",
+                rec.generation, rec.best_bb100, rec.mean_bb100, rec.sigma,
+            );
+        }
+    }
+
+    println!("{}", "─".repeat(44));
+    println!();
+    println!("Training complete after {} generations.", result.generations_run);
+    println!("  Baseline BB/100 : (varies by RNG; see exploitative_play example)");
+    println!("  Best BB/100     : {:+.1}", result.best_fitness);
+    println!();
+
+    // Save the trained config as YAML.
+    match save_yaml(&result.best_config, &output_path) {
+        Ok(()) => println!("Saved trained config to {output_path}"),
+        Err(e) => eprintln!("Warning: could not save config: {e}"),
+    }
+
+    println!();
+    println!("To use the trained config in exploitative_play:");
+    println!("  See ExploitativeDecider::wrap_with_config and");
+    println!("  serde_yaml_bw::from_str for YAML loading.");
+}
+
+/// Saves `config` to `path` as YAML.
+fn save_yaml(config: &ExploitConfig, path: &str) -> Result<(), Box<dyn std::error::Error>> {
+    let yaml = serde_yaml_bw::to_string(config)?;
+    let p = PathBuf::from(path);
+    if let Some(parent) = p.parent() {
+        std::fs::create_dir_all(parent)?;
+    }
+    std::fs::write(&p, yaml)?;
+    Ok(())
+}
+
+/// Parses `--output PATH`, `--generations N`, and `--hands N` from `args`.
+fn parse_args(args: &[String]) -> (String, usize, usize) {
+    let mut output = DEFAULT_OUTPUT.to_string();
+    let mut generations = 100_usize;
+    let mut hands = 300_usize;
+
+    let mut i = 1;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--output" if i + 1 < args.len() => {
+                output = args[i + 1].clone();
+                i += 2;
+            }
+            "--generations" if i + 1 < args.len() => {
+                generations = args[i + 1].parse().unwrap_or(generations);
+                i += 2;
+            }
+            "--hands" if i + 1 < args.len() => {
+                hands = args[i + 1].parse().unwrap_or(hands);
+                i += 2;
+            }
+            _ => {
+                i += 1;
+            }
+        }
+    }
+    (output, generations, hands)
+}
@@ -32,6 +32,7 @@ use crate::bot::table_snapshot::{SeatInfo, TableSnapshot};
 /// assert!(cfg.min_hands_light < cfg.min_hands_heavy);
 /// ```
 #[derive(Clone, Debug, PartialEq)]
+#[cfg_attr(feature = "bot-training", derive(serde::Serialize, serde::Deserialize))]
 pub struct ExploitConfig {
     /// Fold-to-c-bet rate above which we c-bet more aggressively.
     pub fold_to_cbet_high_threshold: f64,
 
@@ -15,4 +15,6 @@ pub mod range_strategy;
 pub mod sim;
 pub mod table_size;
 pub mod table_snapshot;
+#[cfg(feature = "bot-training")]
+pub mod training;
 pub mod weighted_range;