-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcalculete.py
More file actions
97 lines (75 loc) · 3.79 KB
/
calculete.py
File metadata and controls
97 lines (75 loc) · 3.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
TICKERS = [
'IAI', 'IVV', 'ESGU', 'PICK', 'QUAL',
'SLV', 'IWB', 'HEWJ', 'RING', 'IAU',
'IYY', 'EWT', 'ITOT', 'IWV', 'IAK',
'ILCB', 'DIVB', 'ICVT', 'DGRO', 'IFRA'
]
def load_data(filepath: str) -> pd.DataFrame:
return pd.read_csv(filepath)
def calculate_monthly_avg(pred_data: pd.DataFrame) -> pd.DataFrame:
pred_data = pred_data.copy()
pred_data['date'] = pd.to_datetime(pred_data['date'])
pred_data['year_month'] = pred_data['date'].dt.to_period('M')
monthly_avg = pred_data.groupby('year_month').agg({'pred': 'mean', 'Return': 'mean'}).reset_index()
return monthly_avg
def calculate_mean_return(real_data: pd.DataFrame) -> pd.DataFrame:
real_data = real_data.copy()
real_data['date'] = pd.to_datetime(real_data['date'])
real_data.set_index('date', inplace=True)
monthly_starts = real_data.resample('MS').first().index
results = []
for start_date in monthly_starts:
date_range_start = start_date - pd.Timedelta(days=50)
date_range_end = start_date - pd.Timedelta(days=1)
filtered_data = real_data.loc[date_range_start:date_range_end]
mean_return = filtered_data['Return'].mean()
results.append({'month': start_date, 'mean_return': mean_return})
results_df = pd.DataFrame(results)
results_df['month'] = pd.to_datetime(results_df['month'])
results_df['year_month'] = results_df['month'].dt.strftime('%Y-%m')
return results_df
def merge_data(monthly_avg: pd.DataFrame, results_df: pd.DataFrame) -> pd.DataFrame:
monthly_avg = monthly_avg.copy()
results_df = results_df.copy()
monthly_avg['year_month'] = monthly_avg['year_month'].astype(str)
results_df['year_month'] = results_df['year_month'].astype(str)
merged_df = pd.merge(monthly_avg, results_df[['year_month', 'mean_return']], how='left', on='year_month')
return merged_df
def calculate_metrics(y_true: pd.Series, y_pred: pd.Series) -> dict:
metrics = {
'MAE': mean_absolute_error(y_true, y_pred),
'MSE': mean_squared_error(y_true, y_pred),
'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
'MAPE': np.mean(np.abs((y_true - y_pred) / y_true)) * 100,
}
return metrics
def collect_ticker_metrics(ticker: str) -> list:
pred_data = load_data(f'res/123_{ticker}pred.csv')
real_data = load_data(f'data/{ticker}.csv')
monthly_avg = calculate_monthly_avg(pred_data)
results_df_ticker = calculate_mean_return(real_data)
merged_df = merge_data(monthly_avg, results_df_ticker)
pred_metrics = calculate_metrics(merged_df['Return'], merged_df['pred'])
mean_return_metrics = calculate_metrics(merged_df['Return'], merged_df['mean_return'])
return [
{'Ticker': ticker, 'Metric': 'pred_mae', 'Value': pred_metrics['MAE']},
{'Ticker': ticker, 'Metric': 'pred_mse', 'Value': pred_metrics['MSE']},
{'Ticker': ticker, 'Metric': 'pred_rmse', 'Value': pred_metrics['RMSE']},
{'Ticker': ticker, 'Metric': 'pred_mape', 'Value': pred_metrics['MAPE']},
{'Ticker': ticker, 'Metric': 'mean_mae', 'Value': mean_return_metrics['MAE']},
{'Ticker': ticker, 'Metric': 'mean_mse', 'Value': mean_return_metrics['MSE']},
{'Ticker': ticker, 'Metric': 'mean_rmse', 'Value': mean_return_metrics['RMSE']},
{'Ticker': ticker, 'Metric': 'mean_mape', 'Value': mean_return_metrics['MAPE']},
]
def main() -> pd.DataFrame:
results_list = []
for ticker in TICKERS:
results_list.extend(collect_ticker_metrics(ticker))
return pd.DataFrame(results_list)
if __name__ == '__main__':
results_df = main()
transposed_df = results_df.pivot(index='Ticker', columns='Metric', values='Value')
transposed_df.to_csv('Mean vs Pred.csv')