|
| 1 | +import os |
| 2 | +import sys |
| 3 | +import random |
| 4 | +import hashlib |
| 5 | +import numpy as np |
| 6 | +import pandas as pd |
| 7 | +import time |
| 8 | +from datetime import datetime |
| 9 | + |
| 10 | +# Add project root to sys.path |
| 11 | +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 12 | + |
| 13 | +from src.config import ( |
| 14 | + ASSET_NAMES, |
| 15 | + DATA_FOLDER, |
| 16 | + OUTPUT_FOLDER, |
| 17 | + TRAINING_ITERATIONS, |
| 18 | + NUM_MIXTURES, |
| 19 | + METRIC_TYPE, |
| 20 | +) |
| 21 | +from src.data_handler import load_data |
| 22 | +from src.predictors import ExactGPPredictor |
| 23 | +from src.portfolio import ( |
| 24 | + calculate_expected_returns_and_cov, |
| 25 | + EpsilonConstrainedOptimizer, |
| 26 | + MOEADOptimizer, |
| 27 | + MOEADDRAOptimizer, |
| 28 | + MOEADAWAOptimizer, |
| 29 | +) |
| 30 | +from src.portfolio.metrics import calculate_igd, calculate_hypervolume |
| 31 | +from src.visualization import ( |
| 32 | + plot_variants_convergence, |
| 33 | + plot_variants_comparison, |
| 34 | +) |
| 35 | + |
| 36 | + |
| 37 | +def get_data_hash(folder_path): |
| 38 | + """Calculates a hash of all file contents in the data folder.""" |
| 39 | + hasher = hashlib.sha256() |
| 40 | + for root, dirs, files in os.walk(folder_path): |
| 41 | + for name in sorted(files): |
| 42 | + file_path = os.path.join(root, name) |
| 43 | + with open(file_path, "rb") as f: |
| 44 | + while chunk := f.read(8192): |
| 45 | + hasher.update(chunk) |
| 46 | + return hasher.hexdigest() |
| 47 | + |
| 48 | + |
| 49 | +def seed_everything(seed=42): |
| 50 | + random.seed(seed) |
| 51 | + np.random.seed(seed) |
| 52 | + try: |
| 53 | + import torch |
| 54 | + torch.manual_seed(seed) |
| 55 | + except ImportError: |
| 56 | + pass |
| 57 | + |
| 58 | + |
| 59 | +def run_comparative_experiment(): |
| 60 | + seed_everything(42) |
| 61 | + |
| 62 | + # Setup folders |
| 63 | + run_id = datetime.now().strftime("%Y%m%d_%H%M%S_moead_variants_comp") |
| 64 | + run_folder = os.path.join(OUTPUT_FOLDER, run_id) |
| 65 | + os.makedirs(run_folder, exist_ok=True) |
| 66 | + |
| 67 | + print(f"Starting MOEA/D Variants Comparison. Results will be saved to {run_folder}") |
| 68 | + |
| 69 | + # 1. Load data and calculate expected returns/cov |
| 70 | + data_hash = get_data_hash(DATA_FOLDER) |
| 71 | + raw_data_dict = load_data(DATA_FOLDER, ASSET_NAMES, verbose=False) |
| 72 | + predictor = ExactGPPredictor( |
| 73 | + training_iterations=TRAINING_ITERATIONS, |
| 74 | + num_mixtures=NUM_MIXTURES, |
| 75 | + verbose=False, |
| 76 | + global_data_hash=data_hash, |
| 77 | + ) |
| 78 | + |
| 79 | + sample_timestamps = raw_data_dict[ASSET_NAMES[0]]["timestamps"] |
| 80 | + target_timestamp = sample_timestamps[-1] + 100.0 # 100 steps ahead |
| 81 | + |
| 82 | + expected_returns, cov_matrix = calculate_expected_returns_and_cov( |
| 83 | + raw_data_dict, predictor, target_timestamp, ASSET_NAMES, verbose=False |
| 84 | + ) |
| 85 | + |
| 86 | + data_kwargs = {"expected_returns": expected_returns, "cov_matrix": cov_matrix} |
| 87 | + |
| 88 | + # 2. Generate Reference Front (2D) using ECM |
| 89 | + print("Generating Reference Front using Epsilon-Constrained Method...") |
| 90 | + ecm = EpsilonConstrainedOptimizer() |
| 91 | + ref_metrics, _ = ecm.generate_pareto_front(100, verbose=False, **data_kwargs) |
| 92 | + ref_front_2d = np.column_stack([ref_metrics["Return"], ref_metrics["Risk"]]) |
| 93 | + |
| 94 | + # 3. Define Variants |
| 95 | + variants = { |
| 96 | + "Standard MOEA/D": MOEADOptimizer, |
| 97 | + "MOEA/D-DRA": MOEADDRAOptimizer, |
| 98 | + "MOEA/D-AWA": MOEADAWAOptimizer, |
| 99 | + } |
| 100 | + |
| 101 | + num_runs = 5 |
| 102 | + pop_size = 100 |
| 103 | + gens = 200 |
| 104 | + |
| 105 | + if METRIC_TYPE == "hypervolume": |
| 106 | + metric_func = calculate_hypervolume |
| 107 | + metric_name = "Hypervolume" |
| 108 | + else: |
| 109 | + metric_func = calculate_igd |
| 110 | + metric_name = "IGD" |
| 111 | + |
| 112 | + results = {} |
| 113 | + |
| 114 | + # Final metrics storage for summary |
| 115 | + summary_data = [] |
| 116 | + |
| 117 | + for name, opt_class in variants.items(): |
| 118 | + print(f"Evaluating {name}...") |
| 119 | + all_metric_histories = [] |
| 120 | + all_times = [] |
| 121 | + final_fronts = [] |
| 122 | + final_metrics_list = [] |
| 123 | + |
| 124 | + for run in range(num_runs): |
| 125 | + print(f" Run {run+1}/{num_runs}...", end="\r") |
| 126 | + start_time = time.time() |
| 127 | + optimizer = opt_class() |
| 128 | + |
| 129 | + # Run optimizer |
| 130 | + metrics, weights, vectors, history = optimizer.generate_pareto_front( |
| 131 | + num_points=pop_size, |
| 132 | + generations=gens, |
| 133 | + verbose=False, |
| 134 | + record_history=True, |
| 135 | + **data_kwargs |
| 136 | + ) |
| 137 | + elapsed = time.time() - start_time |
| 138 | + all_times.append(elapsed) |
| 139 | + |
| 140 | + # Process history |
| 141 | + run_metric_history = [] |
| 142 | + for gen_f in history: |
| 143 | + # gen_f contains objective values |
| 144 | + # objectives are Return (max) and Risk (min) |
| 145 | + obt_front = gen_f[:, :2] |
| 146 | + val = metric_func(obt_front, ref_front_2d) |
| 147 | + run_metric_history.append(val) |
| 148 | + |
| 149 | + all_metric_histories.append(run_metric_history) |
| 150 | + |
| 151 | + # Save final front of the last run for comparison plot |
| 152 | + if run == num_runs - 1: |
| 153 | + results[name] = { |
| 154 | + "metrics": metrics, |
| 155 | + "weights": weights, |
| 156 | + } |
| 157 | + |
| 158 | + # Calculate final metrics for this run |
| 159 | + final_front = np.column_stack([metrics["Return"], metrics["Risk"]]) |
| 160 | + final_val = metric_func(final_front, ref_front_2d) |
| 161 | + final_metrics_list.append(final_val) |
| 162 | + |
| 163 | + print(f" {name} Finished. Avg Time: {np.mean(all_times):.2f}s") |
| 164 | + |
| 165 | + # Calculate mean and std history |
| 166 | + mean_history = np.mean(all_metric_histories, axis=0) |
| 167 | + std_history = np.std(all_metric_histories, axis=0) |
| 168 | + |
| 169 | + results[name]["history_mean"] = mean_history |
| 170 | + results[name]["history_std"] = std_history |
| 171 | + results[name]["avg_time"] = np.mean(all_times) |
| 172 | + results[name]["std_time"] = np.std(all_times) |
| 173 | + results[name]["avg_metric"] = np.mean(final_metrics_list) |
| 174 | + results[name]["std_metric"] = np.std(final_metrics_list) |
| 175 | + |
| 176 | + summary_data.append({ |
| 177 | + "Variant": name, |
| 178 | + f"Avg {metric_name}": results[name]["avg_metric"], |
| 179 | + f"Std {metric_name}": results[name]["std_metric"], |
| 180 | + "Avg Time (s)": results[name]["avg_time"], |
| 181 | + "Std Time (s)": results[name]["std_time"] |
| 182 | + }) |
| 183 | + |
| 184 | + # 4. Generate Visualizations |
| 185 | + print("\nGenerating comparative visualizations...") |
| 186 | + |
| 187 | + # Convergence Plot |
| 188 | + histories_dict = { |
| 189 | + name: (res["history_mean"], res["history_std"]) |
| 190 | + for name, res in results.items() |
| 191 | + } |
| 192 | + plot_variants_convergence( |
| 193 | + histories_dict, |
| 194 | + metric_name=metric_name, |
| 195 | + save_path=os.path.join(run_folder, "convergence_comparison.png") |
| 196 | + ) |
| 197 | + |
| 198 | + # Pareto Front Comparison |
| 199 | + variants_fronts = { |
| 200 | + name: res["metrics"] for name, res in results.items() |
| 201 | + } |
| 202 | + plot_variants_comparison( |
| 203 | + ref_metrics, |
| 204 | + variants_fronts, |
| 205 | + save_path=os.path.join(run_folder, "pareto_comparison.png") |
| 206 | + ) |
| 207 | + |
| 208 | + # 5. Export Results |
| 209 | + print("Exporting results...") |
| 210 | + df_summary = pd.DataFrame(summary_data) |
| 211 | + df_summary.to_csv(os.path.join(run_folder, "metrics_comparison.csv"), index=False) |
| 212 | + |
| 213 | + # Export Pareto fronts for each variant |
| 214 | + for name, res in results.items(): |
| 215 | + safe_name = name.lower().replace("/", "_").replace(" ", "_") |
| 216 | + records = [] |
| 217 | + metrics = res["metrics"] |
| 218 | + weights = res["weights"] |
| 219 | + num_points = len(next(iter(metrics.values()))) |
| 220 | + for i in range(num_points): |
| 221 | + rec = {m_name: m_vals[i] for m_name, m_vals in metrics.items()} |
| 222 | + for j, w in enumerate(weights[i]): |
| 223 | + rec[ASSET_NAMES[j]] = w |
| 224 | + records.append(rec) |
| 225 | + df_front = pd.DataFrame(records) |
| 226 | + df_front.to_csv(os.path.join(run_folder, f"pareto_front_{safe_name}.csv"), index=False) |
| 227 | + |
| 228 | + print(f"\nExperiment finished successfully. All results are in: {run_folder}") |
| 229 | + |
| 230 | + |
| 231 | +if __name__ == "__main__": |
| 232 | + run_comparative_experiment() |
0 commit comments