Skip to content

Commit b963c09

Browse files
committed
feat: add multi-variant convergence and Pareto comparison visualizations
1 parent b3c6be4 commit b963c09

2 files changed

Lines changed: 332 additions & 4 deletions

File tree

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
import os
2+
import sys
3+
import random
4+
import hashlib
5+
import numpy as np
6+
import pandas as pd
7+
import time
8+
from datetime import datetime
9+
10+
# Add project root to sys.path
11+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
12+
13+
from src.config import (
14+
ASSET_NAMES,
15+
DATA_FOLDER,
16+
OUTPUT_FOLDER,
17+
TRAINING_ITERATIONS,
18+
NUM_MIXTURES,
19+
METRIC_TYPE,
20+
)
21+
from src.data_handler import load_data
22+
from src.predictors import ExactGPPredictor
23+
from src.portfolio import (
24+
calculate_expected_returns_and_cov,
25+
EpsilonConstrainedOptimizer,
26+
MOEADOptimizer,
27+
MOEADDRAOptimizer,
28+
MOEADAWAOptimizer,
29+
)
30+
from src.portfolio.metrics import calculate_igd, calculate_hypervolume
31+
from src.visualization import (
32+
plot_variants_convergence,
33+
plot_variants_comparison,
34+
)
35+
36+
37+
def get_data_hash(folder_path):
38+
"""Calculates a hash of all file contents in the data folder."""
39+
hasher = hashlib.sha256()
40+
for root, dirs, files in os.walk(folder_path):
41+
for name in sorted(files):
42+
file_path = os.path.join(root, name)
43+
with open(file_path, "rb") as f:
44+
while chunk := f.read(8192):
45+
hasher.update(chunk)
46+
return hasher.hexdigest()
47+
48+
49+
def seed_everything(seed=42):
50+
random.seed(seed)
51+
np.random.seed(seed)
52+
try:
53+
import torch
54+
torch.manual_seed(seed)
55+
except ImportError:
56+
pass
57+
58+
59+
def run_comparative_experiment():
60+
seed_everything(42)
61+
62+
# Setup folders
63+
run_id = datetime.now().strftime("%Y%m%d_%H%M%S_moead_variants_comp")
64+
run_folder = os.path.join(OUTPUT_FOLDER, run_id)
65+
os.makedirs(run_folder, exist_ok=True)
66+
67+
print(f"Starting MOEA/D Variants Comparison. Results will be saved to {run_folder}")
68+
69+
# 1. Load data and calculate expected returns/cov
70+
data_hash = get_data_hash(DATA_FOLDER)
71+
raw_data_dict = load_data(DATA_FOLDER, ASSET_NAMES, verbose=False)
72+
predictor = ExactGPPredictor(
73+
training_iterations=TRAINING_ITERATIONS,
74+
num_mixtures=NUM_MIXTURES,
75+
verbose=False,
76+
global_data_hash=data_hash,
77+
)
78+
79+
sample_timestamps = raw_data_dict[ASSET_NAMES[0]]["timestamps"]
80+
target_timestamp = sample_timestamps[-1] + 100.0 # 100 steps ahead
81+
82+
expected_returns, cov_matrix = calculate_expected_returns_and_cov(
83+
raw_data_dict, predictor, target_timestamp, ASSET_NAMES, verbose=False
84+
)
85+
86+
data_kwargs = {"expected_returns": expected_returns, "cov_matrix": cov_matrix}
87+
88+
# 2. Generate Reference Front (2D) using ECM
89+
print("Generating Reference Front using Epsilon-Constrained Method...")
90+
ecm = EpsilonConstrainedOptimizer()
91+
ref_metrics, _ = ecm.generate_pareto_front(100, verbose=False, **data_kwargs)
92+
ref_front_2d = np.column_stack([ref_metrics["Return"], ref_metrics["Risk"]])
93+
94+
# 3. Define Variants
95+
variants = {
96+
"Standard MOEA/D": MOEADOptimizer,
97+
"MOEA/D-DRA": MOEADDRAOptimizer,
98+
"MOEA/D-AWA": MOEADAWAOptimizer,
99+
}
100+
101+
num_runs = 5
102+
pop_size = 100
103+
gens = 200
104+
105+
if METRIC_TYPE == "hypervolume":
106+
metric_func = calculate_hypervolume
107+
metric_name = "Hypervolume"
108+
else:
109+
metric_func = calculate_igd
110+
metric_name = "IGD"
111+
112+
results = {}
113+
114+
# Final metrics storage for summary
115+
summary_data = []
116+
117+
for name, opt_class in variants.items():
118+
print(f"Evaluating {name}...")
119+
all_metric_histories = []
120+
all_times = []
121+
final_fronts = []
122+
final_metrics_list = []
123+
124+
for run in range(num_runs):
125+
print(f" Run {run+1}/{num_runs}...", end="\r")
126+
start_time = time.time()
127+
optimizer = opt_class()
128+
129+
# Run optimizer
130+
metrics, weights, vectors, history = optimizer.generate_pareto_front(
131+
num_points=pop_size,
132+
generations=gens,
133+
verbose=False,
134+
record_history=True,
135+
**data_kwargs
136+
)
137+
elapsed = time.time() - start_time
138+
all_times.append(elapsed)
139+
140+
# Process history
141+
run_metric_history = []
142+
for gen_f in history:
143+
# gen_f contains objective values
144+
# objectives are Return (max) and Risk (min)
145+
obt_front = gen_f[:, :2]
146+
val = metric_func(obt_front, ref_front_2d)
147+
run_metric_history.append(val)
148+
149+
all_metric_histories.append(run_metric_history)
150+
151+
# Save final front of the last run for comparison plot
152+
if run == num_runs - 1:
153+
results[name] = {
154+
"metrics": metrics,
155+
"weights": weights,
156+
}
157+
158+
# Calculate final metrics for this run
159+
final_front = np.column_stack([metrics["Return"], metrics["Risk"]])
160+
final_val = metric_func(final_front, ref_front_2d)
161+
final_metrics_list.append(final_val)
162+
163+
print(f" {name} Finished. Avg Time: {np.mean(all_times):.2f}s")
164+
165+
# Calculate mean and std history
166+
mean_history = np.mean(all_metric_histories, axis=0)
167+
std_history = np.std(all_metric_histories, axis=0)
168+
169+
results[name]["history_mean"] = mean_history
170+
results[name]["history_std"] = std_history
171+
results[name]["avg_time"] = np.mean(all_times)
172+
results[name]["std_time"] = np.std(all_times)
173+
results[name]["avg_metric"] = np.mean(final_metrics_list)
174+
results[name]["std_metric"] = np.std(final_metrics_list)
175+
176+
summary_data.append({
177+
"Variant": name,
178+
f"Avg {metric_name}": results[name]["avg_metric"],
179+
f"Std {metric_name}": results[name]["std_metric"],
180+
"Avg Time (s)": results[name]["avg_time"],
181+
"Std Time (s)": results[name]["std_time"]
182+
})
183+
184+
# 4. Generate Visualizations
185+
print("\nGenerating comparative visualizations...")
186+
187+
# Convergence Plot
188+
histories_dict = {
189+
name: (res["history_mean"], res["history_std"])
190+
for name, res in results.items()
191+
}
192+
plot_variants_convergence(
193+
histories_dict,
194+
metric_name=metric_name,
195+
save_path=os.path.join(run_folder, "convergence_comparison.png")
196+
)
197+
198+
# Pareto Front Comparison
199+
variants_fronts = {
200+
name: res["metrics"] for name, res in results.items()
201+
}
202+
plot_variants_comparison(
203+
ref_metrics,
204+
variants_fronts,
205+
save_path=os.path.join(run_folder, "pareto_comparison.png")
206+
)
207+
208+
# 5. Export Results
209+
print("Exporting results...")
210+
df_summary = pd.DataFrame(summary_data)
211+
df_summary.to_csv(os.path.join(run_folder, "metrics_comparison.csv"), index=False)
212+
213+
# Export Pareto fronts for each variant
214+
for name, res in results.items():
215+
safe_name = name.lower().replace("/", "_").replace(" ", "_")
216+
records = []
217+
metrics = res["metrics"]
218+
weights = res["weights"]
219+
num_points = len(next(iter(metrics.values())))
220+
for i in range(num_points):
221+
rec = {m_name: m_vals[i] for m_name, m_vals in metrics.items()}
222+
for j, w in enumerate(weights[i]):
223+
rec[ASSET_NAMES[j]] = w
224+
records.append(rec)
225+
df_front = pd.DataFrame(records)
226+
df_front.to_csv(os.path.join(run_folder, f"pareto_front_{safe_name}.csv"), index=False)
227+
228+
print(f"\nExperiment finished successfully. All results are in: {run_folder}")
229+
230+
231+
if __name__ == "__main__":
232+
run_comparative_experiment()

src/visualization.py

Lines changed: 100 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,22 @@ def plot_pareto_front(risks, returns, save_path=None):
7878
plt.savefig(save_path)
7979

8080

81-
def plot_moead_convergence(avg_metric_history, metric_name="IGD", save_path=None):
81+
def plot_moead_convergence(mean_history, std_history=None, metric_name="IGD", save_path=None):
8282
plt.figure(figsize=(10, 6))
83-
plt.plot(avg_metric_history)
84-
plt.title(f"MOEA/D Convergence (Average {metric_name} over 5 runs)")
83+
plt.plot(mean_history, label="Mean")
84+
if std_history is not None:
85+
plt.fill_between(
86+
range(len(mean_history)),
87+
mean_history - std_history,
88+
mean_history + std_history,
89+
alpha=0.2,
90+
label="Std Dev"
91+
)
92+
plt.title(f"MOEA/D Convergence ({metric_name})")
8593
plt.xlabel("Generation")
8694
plt.ylabel(metric_name)
87-
plt.grid(True)
95+
plt.legend()
96+
plt.grid(True, alpha=0.3)
8897
if save_path:
8998
plt.savefig(save_path)
9099

@@ -305,3 +314,90 @@ def plot_moead_sensitivity_heatmap(
305314

306315
if save_path:
307316
plt.savefig(save_path)
317+
318+
319+
def plot_variants_convergence(
320+
histories_dict, metric_name="IGD", save_path=None
321+
):
322+
"""
323+
Plots convergence history for multiple algorithm variants.
324+
325+
Args:
326+
histories_dict: Dict mapping {name: (mean_history, std_history)}
327+
metric_name: Name of the metric (e.g., "IGD")
328+
save_path: Path to save the plot
329+
"""
330+
plt.figure(figsize=(12, 7))
331+
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"]
332+
333+
for i, (name, (mean, std)) in enumerate(histories_dict.items()):
334+
color = colors[i % len(colors)]
335+
plt.plot(mean, label=name, linewidth=2, color=color)
336+
if std is not None:
337+
plt.fill_between(
338+
range(len(mean)),
339+
mean - std,
340+
mean + std,
341+
alpha=0.15,
342+
color=color
343+
)
344+
345+
plt.title(f"Convergence Comparison ({metric_name})")
346+
plt.xlabel("Generation")
347+
plt.ylabel(metric_name)
348+
plt.legend()
349+
plt.grid(True, alpha=0.3)
350+
if save_path:
351+
plt.savefig(save_path)
352+
353+
354+
def plot_variants_comparison(
355+
ref_metrics, variants_results, save_path=None
356+
):
357+
"""
358+
Compares Pareto fronts of multiple variants against a reference.
359+
360+
Args:
361+
ref_metrics: Metrics dict for the reference front (ECM)
362+
variants_results: Dict mapping {name: metrics_dict}
363+
save_path: Path to save the plot
364+
"""
365+
plt.figure(figsize=(12, 8))
366+
367+
# Plot reference
368+
if ref_metrics is not None:
369+
plt.scatter(
370+
ref_metrics["Return"],
371+
ref_metrics["Risk"],
372+
c="black",
373+
marker="x",
374+
s=30,
375+
label="Reference (ECM)",
376+
alpha=0.4,
377+
zorder=1
378+
)
379+
380+
# Plot variants
381+
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"]
382+
for i, (name, metrics) in enumerate(variants_results.items()):
383+
color = colors[i % len(colors)]
384+
plt.scatter(
385+
metrics["Return"],
386+
metrics["Risk"],
387+
marker="o",
388+
s=40,
389+
label=name,
390+
alpha=0.6,
391+
color=color,
392+
edgecolors="white",
393+
linewidth=0.5,
394+
zorder=2
395+
)
396+
397+
plt.title("Pareto Front Comparison: MOEA/D Variants")
398+
plt.xlabel("Expected Return")
399+
plt.ylabel("Expected Risk")
400+
plt.legend()
401+
plt.grid(True, alpha=0.3)
402+
if save_path:
403+
plt.savefig(save_path)

0 commit comments

Comments
 (0)