Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions eval_data/ohlcv_sample.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
date,symbol,open,high,low,close,volume
2020-01-01,AAPL,75,76,74,75.5,1000000
2020-01-02,AAPL,75.5,77,75,76.8,1200000
2020-01-03,AAPL,76.8,78,76,77.5,1100000
2020-01-04,AAPL,77.5,79,77,78.2,1300000
2020-01-05,AAPL,78.2,80,78,79.5,1250000
21 changes: 21 additions & 0 deletions examples/data_split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pandas as pd

df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])

def split_data(df, train_size=3, test_size=1):
splits = []
for start in range(0, len(df) - train_size - test_size + 1):
train = df.iloc[start:start + train_size]
test = df.iloc[start + train_size:start + train_size + test_size]
splits.append((train, test))
return splits

splits = split_data(df)

for i, (train, test) in enumerate(splits):
print(f"Split {i}")
print("Train:")
print(train[["date", "close"]])
print("Test:")
print(test[["date", "close"]])
print("-" * 20)
11 changes: 11 additions & 0 deletions examples/leaky_strategy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pandas as pd

df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])

# Intentionally bad: uses tomorrow's close today.
df["past_return"] = df["close"] / df["close"].shift(1)

df["signal"] = df["past_return"] > 1
df["strategy_return"] = df["signal"] * df["past_return"]

print(df[["date", "symbol", "close", "past_return", "signal", "strategy_return"]])
28 changes: 28 additions & 0 deletions examples/metrics_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pandas as pd

df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])

df["past_return"] = df["close"] / df["close"].shift(1)
df["signal"] = df["past_return"] > 1

fee_rate = 0.001
slippage_rate = 0.0005

df["trade"] = df["signal"].astype(int).diff().abs().fillna(df["signal"].astype(int))
df["gross_return"] = df["signal"] * df["past_return"]
df["cost"] = df["trade"] * (fee_rate + slippage_rate)
df["net_return"] = (df["gross_return"] - df["cost"]).fillna(0)

total_return = df["net_return"].sum()
num_trades = int(df["trade"].sum())
max_drawdown = (df["net_return"].cummax() - df["net_return"]).max()
sharpe = df["net_return"].mean() / df["net_return"].std() if df["net_return"].std() != 0 else 0

metrics = {
"total_return": total_return,
"sharpe": sharpe,
"max_drawdown": max_drawdown,
"num_trades": num_trades
}

print(metrics)
43 changes: 43 additions & 0 deletions examples/safe_optimizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pandas as pd

df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])

df["past_return"] = df["close"] / df["close"].shift(1)

def run_strategy(data, threshold):
data = data.copy()
data["signal"] = data["past_return"] > threshold
data["strategy_return"] = data["signal"] * data["past_return"]
return data["strategy_return"].fillna(0).sum()

def split_data(df, train_size=3, test_size=1):
splits = []
for start in range(0, len(df) - train_size - test_size + 1):
train = df.iloc[start:start + train_size]
test = df.iloc[start + train_size:start + train_size + test_size]
splits.append((train, test))
return splits

thresholds = [1.005, 1.01, 1.015]
results = []

for split_id, (train, test) in enumerate(split_data(df)):
train_scores = {}

for threshold in thresholds:
train_scores[threshold] = run_strategy(train, threshold)

best_threshold = max(train_scores, key=train_scores.get)

test_score = run_strategy(test, best_threshold)

results.append({
"split": split_id,
"best_threshold": best_threshold,
"train_score": train_scores[best_threshold],
"test_score": test_score
})

results_df = pd.DataFrame(results)

print(results_df)
16 changes: 16 additions & 0 deletions examples/trading_costs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pandas as pd

df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])

df["past_return"] = df["close"] / df["close"].shift(1)
df["signal"] = df["past_return"] > 1

fee_rate = 0.001 # 0.1% fee
slippage_rate = 0.0005 # 0.05% slippage

df["trade"] = df["signal"].astype(int).diff().abs().fillna(df["signal"].astype(int))
df["gross_return"] = df["signal"] * df["past_return"]
df["cost"] = df["trade"] * (fee_rate + slippage_rate)
df["net_return"] = df["gross_return"] - df["cost"]

print(df[["date", "close", "signal", "trade", "gross_return", "cost", "net_return"]])
28 changes: 28 additions & 0 deletions examples/walk_forward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pandas as pd

df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])

df["past_return"] = df["close"] / df["close"].shift(1)

# parameters
train_size = 3
test_size = 1

results = []

for start in range(0, len(df) - train_size - test_size + 1):
train = df.iloc[start:start + train_size]
test = df.iloc[start + train_size:start + train_size + test_size]

# simple rule learned from train
threshold = train["past_return"].mean()

test = test.copy()
test["signal"] = test["past_return"] > threshold
test["strategy_return"] = test["signal"] * test["past_return"]

results.append(test)

final = pd.concat(results)

print(final[["date", "close", "past_return", "signal", "strategy_return"]])
13 changes: 13 additions & 0 deletions tests/test_leakage_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pandas as pd

def test_no_future_data_used():
df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])

# SAFE logic (past only)
df["past_return"] = df["close"] / df["close"].shift(1)

# Ensure first value is NaN (no future access)
assert pd.isna(df["past_return"].iloc[0])

# Ensure no use of future data
assert "future_return" not in df.columns
21 changes: 21 additions & 0 deletions tests/test_metrics_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pandas as pd

def test_metrics_exist():
df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])

df["past_return"] = df["close"] / df["close"].shift(1)
df["signal"] = df["past_return"] > 1
df["trade"] = df["signal"].astype(int).diff().abs().fillna(df["signal"].astype(int))
df["net_return"] = df["past_return"].fillna(0)

metrics = {
"total_return": df["net_return"].sum(),
"sharpe": 0,
"max_drawdown": 0,
"num_trades": int(df["trade"].sum())
}

assert "total_return" in metrics
assert "sharpe" in metrics
assert "max_drawdown" in metrics
assert "num_trades" in metrics
47 changes: 47 additions & 0 deletions tests/test_portfolio_walk_forward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pytest
import pandas as pd
import vectorbt as vbt


def test_portfolio_walk_forward_exists():
close = pd.Series([1, 2, 3, 4, 5])
pf = vbt.Portfolio.from_holding(close)

assert hasattr(pf, "walk_forward")


def test_portfolio_walk_forward_returns_dataframe():
close = pd.Series([1, 2, 3, 4, 5])
pf = vbt.Portfolio.from_holding(close)

result = pf.walk_forward(train_size=2, test_size=1)

assert isinstance(result, pd.DataFrame)
assert "train_start" in result.columns
assert "test_start" in result.columns
assert "train_metric" in result.columns
assert "test_metric" in result.columns


def test_portfolio_walk_forward_no_overlap():
close = pd.Series([1, 2, 3, 4, 5])
pf = vbt.Portfolio.from_holding(close)

result = pf.walk_forward(train_size=2, test_size=1)

for _, row in result.iterrows():
assert row["train_end"] < row["test_start"]


def test_portfolio_walk_forward_invalid_sizes():
close = pd.Series([1, 2, 3, 4, 5])
pf = vbt.Portfolio.from_holding(close)

with pytest.raises(ValueError):
pf.walk_forward(train_size=0, test_size=1)

with pytest.raises(ValueError):
pf.walk_forward(train_size=2, test_size=0)

with pytest.raises(ValueError):
pf.walk_forward(train_size=2, test_size=1, step_size=0)
17 changes: 17 additions & 0 deletions tests/test_safe_optimizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pandas as pd

def test_optimizer_uses_train_before_test():
df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
df["past_return"] = df["close"] / df["close"].shift(1)

train = df.iloc[:3]
test = df.iloc[3:4]

assert train.index.max() < test.index.min()

def test_optimizer_does_not_use_future_return():
df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
df["past_return"] = df["close"] / df["close"].shift(1)

assert "future_return" not in df.columns
assert pd.isna(df["past_return"].iloc[0])
17 changes: 17 additions & 0 deletions tests/test_walk_forward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pandas as pd

def test_walk_forward_no_leakage():
df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])

df["past_return"] = df["close"] / df["close"].shift(1)

train = df.iloc[:3]
test = df.iloc[3:4]

threshold = train["past_return"].mean()

test = test.copy()
test["signal"] = test["past_return"] > threshold

# ensure test does not use future data
assert test.index.min() > train.index.max()
16 changes: 16 additions & 0 deletions vectorbt/examples/leaky_strategy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pandas as pd

# Load data
df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])

# ❌ BAD: using future data (this is intentional leakage)
df["future_return"] = df["close"].shift(-1) / df["close"]

# Generate signals (cheating)
df["signal"] = df["future_return"] > 1

# Strategy returns
df["strategy_return"] = df["signal"] * df["future_return"]

print("Leaky strategy output:")
print(df[["date", "symbol", "close", "future_return", "signal", "strategy_return"]])
54 changes: 54 additions & 0 deletions vectorbt/portfolio/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1610,7 +1610,61 @@ def indexing_func(self: PortfolioT, pd_indexing_func: tp.PandasIndexingFunc, **k
init_cash=new_init_cash,
call_seq=new_call_seq,
)
def walk_forward(
self,
train_size: int,
test_size: int,
step_size: int = 1,
metric: str = "total_return",
agg_func=None,
) -> pd.DataFrame:
"""Run simple walk-forward analysis on this portfolio.

Splits the portfolio time index into rolling train/test windows.
Train windows always come before test windows to avoid lookahead leakage.
"""

if train_size <= 0:
raise ValueError("train_size must be greater than 0")
if test_size <= 0:
raise ValueError("test_size must be greater than 0")
if step_size <= 0:
raise ValueError("step_size must be greater than 0")

index = self.wrapper.index
n = len(index)

results = []

for start in range(0, n - train_size - test_size + 1, step_size):
train_start = start
train_end = start + train_size
test_start = train_end
test_end = test_start + test_size

returns = self.returns()
train_returns = returns.iloc[train_start:train_end]
test_returns = returns.iloc[test_start:test_end]
train_metric = train_returns.mean()
test_metric = test_returns.mean()
if agg_func is not None:
train_metric = agg_func(train_metric)
test_metric = agg_func(test_metric)

results.append(
dict(
split=len(results),
train_start=index[train_start],
train_end=index[train_end - 1],
test_start=index[test_start],
test_end=index[test_end - 1],
train_metric=train_metric,
test_metric=test_metric,
)
)

return pd.DataFrame(results)

# ############# Class methods ############# #

@classmethod
Expand Down