MichalRedm
diff --git a/‎.gemini/GEMINI.md‎
Lines changed: 8 additions & 0 deletions b/‎.gemini/GEMINI.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎scripts/evaluate_predictions.py‎
Lines changed: 2 additions & 2 deletions b/‎scripts/evaluate_predictions.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎scripts/experiment_moead.py‎
Lines changed: 8 additions & 5 deletions b/‎scripts/experiment_moead.py‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎scripts/experiment_repair_bias.py‎
Lines changed: 127 additions & 0 deletions b/‎scripts/experiment_repair_bias.py‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎scripts/verify_variants.py‎
Lines changed: 17 additions & 18 deletions b/‎scripts/verify_variants.py‎
Lines changed: 17 additions & 18 deletions
diff --git a/‎src/portfolio/moead.py‎
Lines changed: 25 additions & 7 deletions b/‎src/portfolio/moead.py‎
Lines changed: 25 additions & 7 deletions
@@ -0,0 +1,8 @@
+# GitHub Interactions and CI Protocol
+
+When interacting with Git and GitHub (committing, branching, updating code, and pushing), you must strictly follow the `git-commit-protocol` skill.
+
+## Key Rules for GitHub Interactions
+1. **Follow Conventional Commits:** All your commit messages must follow the standard `<type>: <description>` format defined in the protocol.
+2. **Atomic Commits:** Make sure your commits contain single logical units of work.
+3. **Pre-Push CI Verification (CRITICAL):** Before executing `git push`, you must always inspect the local repository's CI workflow files (e.g. `.github/workflows/ci.yml`). You must identify the commands run by the CI (such as `ruff format --check .`, `ruff check .`, `pytest`, etc.) and **run them locally**. If any step fails, you must fix the code to resolve the linting or test failures before you are allowed to push to the remote repository.
@@ -23,8 +23,8 @@
 )
 
 # Configuration overrides for evaluation
-TRAIN_DIR = "data/Bundle1"
-EVAL_DIR = "data/Bundle2"
+TRAIN_DIR = "data/Bundle2"
+EVAL_DIR = "data/Bundle3"
 OUTPUT_DIR = "output"
 
 
 
@@ -121,7 +121,7 @@ def run_experiment():
             metric_vals = []
             for run in range(num_runs):
                 moead = MOEADOptimizer()
-                metrics, _ = moead.generate_pareto_front(
+                metrics, _, _ = moead.generate_pareto_front(
                     num_points=pop_size, generations=gens, verbose=False, **data_kwargs
                 )
                 obt_front = np.column_stack([metrics["Return"], metrics["Risk"]])
@@ -160,7 +160,7 @@ def run_experiment():
 
     for run in range(num_runs):
         moead = MOEADOptimizer()
-        metrics, _, history = moead.generate_pareto_front(
+        metrics, _, _, history = moead.generate_pareto_front(
             num_points=pop_size,
             generations=gens,
             verbose=False,
@@ -187,12 +187,15 @@ def run_experiment():
 
     # 5. Final Population Comparison (2D)
     print("Comparing Final Populations...")
-    moead_metrics, moead_weights = moead.generate_pareto_front(
+    moead_metrics, moead_weights_assets, weight_vectors = moead.generate_pareto_front(
         num_points=100, generations=200, verbose=False, **data_kwargs
     )
     plot_moead_2d_comparison(
         ref_metrics,
         moead_metrics,
+        weight_vectors=weight_vectors,
+        z_ideal=moead.z_ideal,
+        z_nadir=moead.z_nadir,
         save_path=os.path.join(run_folder, "2d_comparison.png"),
     )
     print("Comparison plot saved.")
@@ -202,7 +205,7 @@ def run_experiment():
     num_points_2d = len(next(iter(moead_metrics.values())))
     for idx in range(num_points_2d):
         record = {name: values[idx] for name, values in moead_metrics.items()}
-        for j, weight in enumerate(moead_weights[idx]):
+        for j, weight in enumerate(moead_weights_assets[idx]):
             record[ASSET_NAMES[j]] = weight
         records_2d.append(record)
     df_pareto_2d = pd.DataFrame(records_2d)
@@ -219,7 +222,7 @@ def run_experiment():
     )
     moead_3d = MOEADOptimizer(problem=prob_3d)
 
-    metrics_3d, weights_3d = moead_3d.generate_pareto_front(
+    metrics_3d, weights_3d, _ = moead_3d.generate_pareto_front(
         num_points=150, generations=300, verbose=False, **data_kwargs
     )
     plot_moead_3d_pareto(
 
@@ -0,0 +1,127 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import sys
+
+# Add the project root to sys.path so we can import src
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+
+from src.portfolio.moead import MOEADOptimizer
+
+
+def proj_simple_normalization(weights):
+    """
+    Project weights by clipping negatives to 0 and normalizing the sum.
+    """
+    weights = np.maximum(weights, 0)
+    s = np.sum(weights)
+    if s == 0:
+        return np.ones(len(weights)) / len(weights)
+    return weights / s
+
+
+def map_to_2d(weights):
+    w2 = weights[:, 1]
+    w3 = weights[:, 2]
+
+    x = w2 + 0.5 * w3
+    y = (np.sqrt(3.0) / 2.0) * w3
+    return x, y
+
+
+def plot_triangle(ax):
+    triangle = np.array([[0, 0], [1, 0], [0.5, np.sqrt(3) / 2], [0, 0]])
+    ax.plot(triangle[:, 0], triangle[:, 1], "k-", lw=1.5)
+    ax.set_aspect("equal")
+    ax.axis("off")
+
+
+def run_simulation():
+    np.random.seed(42)
+    N_points = 2000
+    N_iterations = 200  # Increased to allow operators to reach stationary distribution
+
+    # 1. Start with a uniform distribution of points on the 3-simplex
+    initial_points = np.random.dirichlet(np.ones(3), N_points)
+
+    points_euclidean = initial_points.copy()
+    points_norm = initial_points.copy()
+
+    optimizer = MOEADOptimizer(problem=None)
+
+    # 2. Iterate
+    for i in range(N_iterations):
+        # Create random pairs for crossover
+        indices = np.random.permutation(N_points)
+        p1_idx = indices[: N_points // 2]
+        p2_idx = indices[N_points // 2 :]
+
+        # We process Euclidean population
+        offspring_euclidean = np.empty((N_points, 3))
+        for j in range(len(p1_idx)):
+            p1 = points_euclidean[p1_idx[j]]
+            p2 = points_euclidean[p2_idx[j]]
+            # Generate two children per pair to keep population size constant
+            offspring_euclidean[2 * j] = optimizer._sbx_crossover(p1, p2)
+            # Second child: technically MOEAD generates one randomly, we'll force the other to keep pops equal,
+            # but simpler to just call it again
+            offspring_euclidean[2 * j + 1] = optimizer._sbx_crossover(p1, p2)
+
+        for k in range(N_points):
+            offspring_euclidean[k] = optimizer._polynomial_mutation(
+                offspring_euclidean[k]
+            )
+            # Apply Euclidean projection from MOEAD
+            points_euclidean[k] = optimizer._repair(offspring_euclidean[k])
+
+        # We process Normalization population
+        offspring_norm = np.empty((N_points, 3))
+        for j in range(len(p1_idx)):
+            p1 = points_norm[p1_idx[j]]
+            p2 = points_norm[p2_idx[j]]
+            offspring_norm[2 * j] = optimizer._sbx_crossover(p1, p2)
+            offspring_norm[2 * j + 1] = optimizer._sbx_crossover(p1, p2)
+
+        for k in range(N_points):
+            offspring_norm[k] = optimizer._polynomial_mutation(offspring_norm[k])
+            points_norm[k] = proj_simple_normalization(offspring_norm[k])
+
+    # 3. Visualization
+    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
+
+    # Using hexbin to clearly show the density and fix the "no difference" plotting issue
+
+    ax = axes[0]
+    plot_triangle(ax)
+    x, y = map_to_2d(initial_points)
+    hb = ax.hexbin(x, y, gridsize=30, cmap="viridis", mincnt=1)
+    fig.colorbar(hb, ax=ax, label="Count")
+    ax.set_title("Initial (Uniform)")
+
+    ax = axes[1]
+    plot_triangle(ax)
+    x, y = map_to_2d(points_euclidean)
+    hb = ax.hexbin(x, y, gridsize=30, cmap="viridis", mincnt=1)
+    fig.colorbar(hb, ax=ax, label="Count")
+    ax.set_title(
+        f"Euclidean Projection\n(After {N_iterations} iters with SBX & PolyMut)"
+    )
+
+    ax = axes[2]
+    plot_triangle(ax)
+    x, y = map_to_2d(points_norm)
+    hb = ax.hexbin(x, y, gridsize=30, cmap="viridis", mincnt=1)
+    fig.colorbar(hb, ax=ax, label="Count")
+    ax.set_title(
+        f"Simple Normalization\n(After {N_iterations} iters with SBX & PolyMut)"
+    )
+
+    plt.tight_layout()
+    output_path = os.path.join("plots", "repair_bias_simulation.png")
+    os.makedirs("plots", exist_ok=True)
+    plt.savefig(output_path, dpi=150, bbox_inches="tight")
+    print(f"Result saved to {output_path}")
+
+
+if __name__ == "__main__":
+    run_simulation()
@@ -1,13 +1,11 @@
 import os
 import sys
 import numpy as np
-from datetime import datetime
 
 # Add project root to sys.path
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from src.portfolio import (
-    calculate_expected_returns_and_cov,
     MOEADOptimizer,
     MOEADDRAOptimizer,
     MOEADAWAOptimizer,
@@ -16,45 +14,46 @@
     RiskObjective,
 )
 
+
 def run_test():
     print("Starting MOEA/D Variants Verification Test...")
-    
+
     # Mock data for testing
     num_assets = 5
     expected_returns = np.array([0.1, 0.15, 0.12, 0.08, 0.11])
     cov_matrix = np.eye(num_assets) * 0.05
     cov_matrix[0, 1] = cov_matrix[1, 0] = 0.01
-    
-    data_kwargs = {
-        "expected_returns": expected_returns,
-        "cov_matrix": cov_matrix
-    }
-    
+
+    data_kwargs = {"expected_returns": expected_returns, "cov_matrix": cov_matrix}
+
     problem = PortfolioProblem([ReturnObjective(), RiskObjective()])
-    
+
     optimizers = {
         "Standard MOEA/D": MOEADOptimizer(problem=problem),
         "MOEA/D-DRA": MOEADDRAOptimizer(problem=problem),
-        "MOEA/D-AWA": MOEADAWAOptimizer(problem=problem)
+        "MOEA/D-AWA": MOEADAWAOptimizer(problem=problem),
     }
-    
+
     for name, opt in optimizers.items():
         print(f"\nRunning {name}...")
         try:
             metrics, weights, w_vectors = opt.generate_pareto_front(
-                num_points=50, 
-                generations=100, 
-                verbose=False, 
-                **data_kwargs
+                num_points=50, generations=100, verbose=False, **data_kwargs
             )
             print(f"  {name} completed successfully.")
             print(f"  Points found: {len(weights)}")
-            print(f"  Return range: [{np.min(metrics['Return']):.4f}, {np.max(metrics['Return']):.4f}]")
-            print(f"  Risk range:   [{np.min(metrics['Risk']):.4f}, {np.max(metrics['Risk']):.4f}]")
+            print(
+                f"  Return range: [{np.min(metrics['Return']):.4f}, {np.max(metrics['Return']):.4f}]"
+            )
+            print(
+                f"  Risk range:   [{np.min(metrics['Risk']):.4f}, {np.max(metrics['Risk']):.4f}]"
+            )
         except Exception as e:
             print(f"  {name} FAILED with error: {e}")
             import traceback
+
             traceback.print_exc()
 
+
 if __name__ == "__main__":
     run_test()
@@ -18,7 +18,14 @@ def get_pareto_parameters(self, num_points, **kwargs):
     # ── Public interface ──────────────────────────────────────────────────────
 
     def generate_pareto_front(
-        self, num_points=100, generations=100, T=10, nr=2, verbose=False, crossover_operator="sbx", **kwargs
+        self,
+        num_points=100,
+        generations=100,
+        T=10,
+        nr=2,
+        verbose=False,
+        crossover_operator="sbx",
+        **kwargs,
     ):
         """
         MOEA/D with Tchebycheff decomposition, stable normalisation, and
@@ -145,21 +152,27 @@ def normalise(f):
 
                 if crossover_operator == "sbx":
                     # Simulated Binary Crossover (SBX)
-                    offspring = self._sbx_crossover(population[p1_idx], population[p2_idx])
+                    offspring = self._sbx_crossover(
+                        population[p1_idx], population[p2_idx]
+                    )
                     # CRITICAL: We MUST repair immediately after SBX
                     offspring = self._repair(offspring)
-                    
+
                     # Polynomial Mutation
                     offspring = self._polynomial_mutation(offspring)
                     # CRITICAL: Repair again because polynomial mutation pushes bounds
                     offspring = self._repair(offspring)
-                    
+
                 elif crossover_operator == "simplex":
                     # Linear mix crossing the full valid line segment on the simplex.
                     # It natively avoids breaking the simplex geometry.
-                    offspring = self._simplex_crossover(population[p1_idx], population[p2_idx])
+                    offspring = self._simplex_crossover(
+                        population[p1_idx], population[p2_idx]
+                    )
                 else:
-                    raise ValueError(f"Unknown crossover operator: {crossover_operator}")
+                    raise ValueError(
+                        f"Unknown crossover operator: {crossover_operator}"
+                    )
 
                 # Evaluate offspring.
                 off_f = np.array(
@@ -199,5 +212,10 @@ def normalise(f):
         }
 
         if kwargs.get("record_history", False):
-            return pareto_metrics, np.array(population), weight_vectors, np.array(history)
+            return (
+                pareto_metrics,
+                np.array(population),
+                weight_vectors,
+                np.array(history),
+            )
         return pareto_metrics, np.array(population), weight_vectors
Original file line number	Diff line number	Diff line change
`@@ -23,8 +23,8 @@`
`23`	`23`	`)`
`24`	`24`
`25`	`25`	`# Configuration overrides for evaluation`
`26`		`-TRAIN_DIR = "data/Bundle1"`
`27`		`-EVAL_DIR = "data/Bundle2"`
	`26`	`+TRAIN_DIR = "data/Bundle2"`
	`27`	`+EVAL_DIR = "data/Bundle3"`
`28`	`28`	`OUTPUT_DIR = "output"`
`29`	`29`
`30`	`30`