polakowo · kothurisindhu2000 · May 4, 2026 · May 4, 2026 · May 5, 2026
diff --git a/eval_data/ohlcv_sample.csv b/eval_data/ohlcv_sample.csv
@@ -0,0 +1,6 @@
+date,symbol,open,high,low,close,volume
+2020-01-01,AAPL,75,76,74,75.5,1000000
+2020-01-02,AAPL,75.5,77,75,76.8,1200000
+2020-01-03,AAPL,76.8,78,76,77.5,1100000
+2020-01-04,AAPL,77.5,79,77,78.2,1300000
+2020-01-05,AAPL,78.2,80,78,79.5,1250000
diff --git a/examples/data_split.py b/examples/data_split.py
@@ -0,0 +1,21 @@
+import pandas as pd
+
+df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+
+def split_data(df, train_size=3, test_size=1):
+    splits = []
+    for start in range(0, len(df) - train_size - test_size + 1):
+        train = df.iloc[start:start + train_size]
+        test = df.iloc[start + train_size:start + train_size + test_size]
+        splits.append((train, test))
+    return splits
+
+splits = split_data(df)
+
+for i, (train, test) in enumerate(splits):
+    print(f"Split {i}")
+    print("Train:")
+    print(train[["date", "close"]])
+    print("Test:")
+    print(test[["date", "close"]])
+    print("-" * 20)
diff --git a/examples/leaky_strategy.py b/examples/leaky_strategy.py
@@ -0,0 +1,11 @@
+import pandas as pd
+
+df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+
+# Intentionally bad: uses tomorrow's close today.
+df["past_return"] = df["close"] / df["close"].shift(1)
+
+df["signal"] = df["past_return"] > 1
+df["strategy_return"] = df["signal"] * df["past_return"]
+
+print(df[["date", "symbol", "close", "past_return", "signal", "strategy_return"]])
diff --git a/examples/metrics_report.py b/examples/metrics_report.py
@@ -0,0 +1,28 @@
+import pandas as pd
+
+df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+
+df["past_return"] = df["close"] / df["close"].shift(1)
+df["signal"] = df["past_return"] > 1
+
+fee_rate = 0.001
+slippage_rate = 0.0005
+
+df["trade"] = df["signal"].astype(int).diff().abs().fillna(df["signal"].astype(int))
+df["gross_return"] = df["signal"] * df["past_return"]
+df["cost"] = df["trade"] * (fee_rate + slippage_rate)
+df["net_return"] = (df["gross_return"] - df["cost"]).fillna(0)
+
+total_return = df["net_return"].sum()
+num_trades = int(df["trade"].sum())
+max_drawdown = (df["net_return"].cummax() - df["net_return"]).max()
+sharpe = df["net_return"].mean() / df["net_return"].std() if df["net_return"].std() != 0 else 0
+
+metrics = {
+    "total_return": total_return,
+    "sharpe": sharpe,
+    "max_drawdown": max_drawdown,
+    "num_trades": num_trades
+}
+
+print(metrics)
diff --git a/examples/safe_optimizer.py b/examples/safe_optimizer.py
@@ -0,0 +1,43 @@
+import pandas as pd
+
+df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+
+df["past_return"] = df["close"] / df["close"].shift(1)
+
+def run_strategy(data, threshold):
+    data = data.copy()
+    data["signal"] = data["past_return"] > threshold
+    data["strategy_return"] = data["signal"] * data["past_return"]
+    return data["strategy_return"].fillna(0).sum()
+
+def split_data(df, train_size=3, test_size=1):
+    splits = []
+    for start in range(0, len(df) - train_size - test_size + 1):
+        train = df.iloc[start:start + train_size]
+        test = df.iloc[start + train_size:start + train_size + test_size]
+        splits.append((train, test))
+    return splits
+
+thresholds = [1.005, 1.01, 1.015]
+results = []
+
+for split_id, (train, test) in enumerate(split_data(df)):
+    train_scores = {}
+
+    for threshold in thresholds:
+        train_scores[threshold] = run_strategy(train, threshold)
+
+    best_threshold = max(train_scores, key=train_scores.get)
+
+    test_score = run_strategy(test, best_threshold)
+
+    results.append({
+        "split": split_id,
+        "best_threshold": best_threshold,
+        "train_score": train_scores[best_threshold],
+        "test_score": test_score
+    })
+
+results_df = pd.DataFrame(results)
+
+print(results_df)
diff --git a/examples/trading_costs.py b/examples/trading_costs.py
@@ -0,0 +1,16 @@
+import pandas as pd
+
+df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+
+df["past_return"] = df["close"] / df["close"].shift(1)
+df["signal"] = df["past_return"] > 1
+
+fee_rate = 0.001      # 0.1% fee
+slippage_rate = 0.0005  # 0.05% slippage
+
+df["trade"] = df["signal"].astype(int).diff().abs().fillna(df["signal"].astype(int))
+df["gross_return"] = df["signal"] * df["past_return"]
+df["cost"] = df["trade"] * (fee_rate + slippage_rate)
+df["net_return"] = df["gross_return"] - df["cost"]
+
+print(df[["date", "close", "signal", "trade", "gross_return", "cost", "net_return"]])
diff --git a/examples/walk_forward.py b/examples/walk_forward.py
@@ -0,0 +1,28 @@
+import pandas as pd
+
+df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+
+df["past_return"] = df["close"] / df["close"].shift(1)
+
+# parameters
+train_size = 3
+test_size = 1
+
+results = []
+
+for start in range(0, len(df) - train_size - test_size + 1):
+    train = df.iloc[start:start + train_size]
+    test = df.iloc[start + train_size:start + train_size + test_size]
+
+    # simple rule learned from train
+    threshold = train["past_return"].mean()
+
+    test = test.copy()
+    test["signal"] = test["past_return"] > threshold
+    test["strategy_return"] = test["signal"] * test["past_return"]
+
+    results.append(test)
+
+final = pd.concat(results)
+
+print(final[["date", "close", "past_return", "signal", "strategy_return"]])
diff --git a/tests/test_leakage_detection.py b/tests/test_leakage_detection.py
@@ -0,0 +1,13 @@
+import pandas as pd
+
+def test_no_future_data_used():
+    df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+
+    # SAFE logic (past only)
+    df["past_return"] = df["close"] / df["close"].shift(1)
+
+    # Ensure first value is NaN (no future access)
+    assert pd.isna(df["past_return"].iloc[0])
+
+    # Ensure no use of future data
+    assert "future_return" not in df.columns
diff --git a/tests/test_metrics_report.py b/tests/test_metrics_report.py
@@ -0,0 +1,21 @@
+import pandas as pd
+
+def test_metrics_exist():
+    df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+
+    df["past_return"] = df["close"] / df["close"].shift(1)
+    df["signal"] = df["past_return"] > 1
+    df["trade"] = df["signal"].astype(int).diff().abs().fillna(df["signal"].astype(int))
+    df["net_return"] = df["past_return"].fillna(0)
+
+    metrics = {
+        "total_return": df["net_return"].sum(),
+        "sharpe": 0,
+        "max_drawdown": 0,
+        "num_trades": int(df["trade"].sum())
+    }
+
+    assert "total_return" in metrics
+    assert "sharpe" in metrics
+    assert "max_drawdown" in metrics
+    assert "num_trades" in metrics
diff --git a/tests/test_portfolio_walk_forward.py b/tests/test_portfolio_walk_forward.py
@@ -0,0 +1,47 @@
+import pytest
+import pandas as pd
+import vectorbt as vbt
+
+
+def test_portfolio_walk_forward_exists():
+    close = pd.Series([1, 2, 3, 4, 5])
+    pf = vbt.Portfolio.from_holding(close)
+
+    assert hasattr(pf, "walk_forward")
+
+
+def test_portfolio_walk_forward_returns_dataframe():
+    close = pd.Series([1, 2, 3, 4, 5])
+    pf = vbt.Portfolio.from_holding(close)
+
+    result = pf.walk_forward(train_size=2, test_size=1)
+
+    assert isinstance(result, pd.DataFrame)
+    assert "train_start" in result.columns
+    assert "test_start" in result.columns
+    assert "train_metric" in result.columns
+    assert "test_metric" in result.columns
+
+
+def test_portfolio_walk_forward_no_overlap():
+    close = pd.Series([1, 2, 3, 4, 5])
+    pf = vbt.Portfolio.from_holding(close)
+
+    result = pf.walk_forward(train_size=2, test_size=1)
+
+    for _, row in result.iterrows():
+        assert row["train_end"] < row["test_start"]
+
+
+def test_portfolio_walk_forward_invalid_sizes():
+    close = pd.Series([1, 2, 3, 4, 5])
+    pf = vbt.Portfolio.from_holding(close)
+
+    with pytest.raises(ValueError):
+        pf.walk_forward(train_size=0, test_size=1)
+
+    with pytest.raises(ValueError):
+        pf.walk_forward(train_size=2, test_size=0)
+
+    with pytest.raises(ValueError):
+        pf.walk_forward(train_size=2, test_size=1, step_size=0)
diff --git a/tests/test_safe_optimizer.py b/tests/test_safe_optimizer.py
@@ -0,0 +1,17 @@
+import pandas as pd
+
+def test_optimizer_uses_train_before_test():
+    df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+    df["past_return"] = df["close"] / df["close"].shift(1)
+
+    train = df.iloc[:3]
+    test = df.iloc[3:4]
+
+    assert train.index.max() < test.index.min()
+
+def test_optimizer_does_not_use_future_return():
+    df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+    df["past_return"] = df["close"] / df["close"].shift(1)
+
+    assert "future_return" not in df.columns
+    assert pd.isna(df["past_return"].iloc[0])
diff --git a/tests/test_walk_forward.py b/tests/test_walk_forward.py
@@ -0,0 +1,17 @@
+import pandas as pd
+
+def test_walk_forward_no_leakage():
+    df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+
+    df["past_return"] = df["close"] / df["close"].shift(1)
+
+    train = df.iloc[:3]
+    test = df.iloc[3:4]
+
+    threshold = train["past_return"].mean()
+
+    test = test.copy()
+    test["signal"] = test["past_return"] > threshold
+
+    # ensure test does not use future data
+    assert test.index.min() > train.index.max()
diff --git a/vectorbt/examples/leaky_strategy.py b/vectorbt/examples/leaky_strategy.py
@@ -0,0 +1,16 @@
+import pandas as pd
+
+# Load data
+df = pd.read_csv("eval_data/ohlcv_sample.csv", parse_dates=["date"])
+
+# ❌ BAD: using future data (this is intentional leakage)
+df["future_return"] = df["close"].shift(-1) / df["close"]
+
+# Generate signals (cheating)
+df["signal"] = df["future_return"] > 1
+
+# Strategy returns
+df["strategy_return"] = df["signal"] * df["future_return"]
+
+print("Leaky strategy output:")
+print(df[["date", "symbol", "close", "future_return", "signal", "strategy_return"]])
diff --git a/vectorbt/portfolio/base.py b/vectorbt/portfolio/base.py
@@ -1610,7 +1610,61 @@ def indexing_func(self: PortfolioT, pd_indexing_func: tp.PandasIndexingFunc, **k
             init_cash=new_init_cash,
             call_seq=new_call_seq,
         )
+    def walk_forward(
+        self,
+        train_size: int,
+        test_size: int,
+        step_size: int = 1,
+        metric: str = "total_return",
+        agg_func=None,
+    ) -> pd.DataFrame:
+        """Run simple walk-forward analysis on this portfolio.
+
+        Splits the portfolio time index into rolling train/test windows.
+        Train windows always come before test windows to avoid lookahead leakage.
+        """
+
+        if train_size <= 0:
+            raise ValueError("train_size must be greater than 0")
+        if test_size <= 0:
+            raise ValueError("test_size must be greater than 0")
+        if step_size <= 0:
+            raise ValueError("step_size must be greater than 0")
+
+        index = self.wrapper.index
+        n = len(index)
+
+        results = []
+
+        for start in range(0, n - train_size - test_size + 1, step_size):
+            train_start = start
+            train_end = start + train_size
+            test_start = train_end
+            test_end = test_start + test_size
+
+            returns = self.returns()
+            train_returns = returns.iloc[train_start:train_end]
+            test_returns = returns.iloc[test_start:test_end]
+            train_metric = train_returns.mean()
+            test_metric = test_returns.mean()
+            if agg_func is not None:
+                train_metric = agg_func(train_metric)
+                test_metric = agg_func(test_metric)
+
+            results.append(
+                dict(
+                    split=len(results),
+                    train_start=index[train_start],
+                    train_end=index[train_end - 1],
+                    test_start=index[test_start],
+                    test_end=index[test_end - 1],
+                    train_metric=train_metric,
+                    test_metric=test_metric,
+                )
+            )
 
+        return pd.DataFrame(results)
+
     # ############# Class methods ############# #
 
     @classmethod