Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion flaml/automl/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,12 @@ def _eval_estimator(
logger.warning(f"ValueError {e} happened in `metric_loss_score`, set `val_loss` to `np.inf`")
metric_for_logging = {"pred_time": pred_time}
if log_training_metric:
train_pred_y = get_y_pred(estimator, X_train, eval_metric, task)
# For time series forecasting, X_train may be a sampled dataset whose
# test partition can be empty. Use the training partition from X_val
# (which is the dataset used to define y_train above) to keep shapes
# aligned and avoid empty prediction inputs.
X_train_for_metric = X_val.X_train if isinstance(X_val, TimeSeriesDataset) else X_train
train_pred_y = get_y_pred(estimator, X_train_for_metric, eval_metric, task)
metric_for_logging["train_loss"] = metric_loss_score(
eval_metric,
train_pred_y,
Expand Down
3 changes: 2 additions & 1 deletion flaml/automl/time_series/tcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,8 @@ def fit(self, X_train: TimeSeriesDataset, y_train=None, budget=None, **kwargs):
def predict(self, X):
X = self.enrich(X)
if isinstance(X, TimeSeriesDataset):
df = X.X_val
# Use X_train if X_val is empty (e.g., when computing training metrics)
df = X.X_val if len(X.test_data) > 0 else X.X_train
else:
df = X
dataset = DataframeDataset(
Expand Down
6 changes: 5 additions & 1 deletion flaml/automl/time_series/tft.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,11 @@ def predict(self, X):
last_data_cols = self.group_ids.copy()
last_data_cols.append(self.target_names[0])
last_data = self.data[lambda x: x.time_idx == x.time_idx.max()][last_data_cols]
decoder_data = X.X_val if isinstance(X, TimeSeriesDataset) else X
# Use X_train if test_data is empty (e.g., when computing training metrics)
if isinstance(X, TimeSeriesDataset):
decoder_data = X.X_val if len(X.test_data) > 0 else X.X_train
else:
decoder_data = X
if "time_idx" not in decoder_data:
decoder_data = add_time_idx_col(decoder_data)
decoder_data["time_idx"] += encoder_data["time_idx"].max() + 1 - decoder_data["time_idx"].min()
Expand Down
33 changes: 30 additions & 3 deletions flaml/automl/time_series/ts_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,13 @@ def predict(self, X: Union[TimeSeriesDataset, DataFrame], **kwargs):

elif isinstance(X, TimeSeriesDataset):
data = X
X = data.test_data[[self.time_col] + X.regressors]
# By default we predict on the dataset's test partition.
# Some internal call paths (e.g., training-metric logging) may pass a
# dataset whose test partition is empty; fall back to train partition.
if data.test_data is not None and len(data.test_data):
X = data.test_data[data.regressors + [data.time_col]]
else:
X = data.train_data[data.regressors + [data.time_col]]

if self._model is not None:
forecast = self._model.predict(X, **kwargs)
Expand Down Expand Up @@ -301,7 +307,13 @@ def predict(self, X, **kwargs):

if isinstance(X, TimeSeriesDataset):
data = X
X = data.test_data[data.regressors + [data.time_col]]
# By default we predict on the dataset's test partition.
# Some internal call paths (e.g., training-metric logging) may pass a
# dataset whose test partition is empty; fall back to train partition.
if data.test_data is not None and len(data.test_data):
X = data.test_data[data.regressors + [data.time_col]]
else:
X = data.train_data[data.regressors + [data.time_col]]

X = X.rename(columns={self.time_col: "ds"})
if self._model is not None:
Expand All @@ -327,11 +339,19 @@ def predict(self, X, **kwargs) -> pd.Series:

if isinstance(X, TimeSeriesDataset):
data = X
X = data.test_data[data.regressors + [data.time_col]]
# By default we predict on the dataset's test partition.
# Some internal call paths (e.g., training-metric logging) may pass a
# dataset whose test partition is empty; fall back to train partition.
if data.test_data is not None and len(data.test_data):
X = data.test_data[data.regressors + [data.time_col]]
else:
X = data.train_data[data.regressors + [data.time_col]]
else:
X = X[self.regressors + [self.time_col]]

if isinstance(X, DataFrame):
if X.shape[0] == 0:
return pd.Series([], name=self.target_names[0], dtype=float)
start = X[self.time_col].iloc[0]
end = X[self.time_col].iloc[-1]
if len(self.regressors):
Expand Down Expand Up @@ -829,6 +849,13 @@ def predict(self, X, **kwargs):
if isinstance(X, TimeSeriesDataset):
data = X
X = data.test_data
# By default we predict on the dataset's test partition.
# Some internal call paths (e.g., training-metric logging) may pass a
# dataset whose test partition is empty; fall back to train partition.
if data.test_data is not None and len(data.test_data):
X = data.test_data
else:
X = data.train_data

if self._model is not None:
X = X[self.regressors]
Expand Down
46 changes: 45 additions & 1 deletion test/automl/test_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,11 +681,55 @@ def split_by_date(df: pd.DataFrame, dt: datetime.date):
print("yahoo!")


def test_log_training_metric_ts_models():
"""Test that log_training_metric=True works with time series models (arima, sarimax, holt-winters)."""
import statsmodels.api as sm

from flaml.automl.task.time_series_task import TimeSeriesTask

estimators_all = TimeSeriesTask("forecast").estimators.keys()
estimators_to_test = ["xgboost", "arima", "lassolars", "tcn", "snaive", "prophet", "orbit"]
estimators = [
est for est in estimators_to_test if est in estimators_all
] # not all estimators available in current python env
print(f"Testing estimators: {estimators}")

# Prepare data
data = sm.datasets.co2.load_pandas().data["co2"]
data = data.resample("MS").mean()
data = data.bfill().ffill()
data = data.to_frame().reset_index()
data = data.rename(columns={"index": "ds", "co2": "y"})
num_samples = data.shape[0]
time_horizon = 12
split_idx = num_samples - time_horizon
df = data[:split_idx]

# Test each time series model with log_training_metric=True
for estimator in estimators:
print(f"\nTesting {estimator} with log_training_metric=True")
automl = AutoML()
settings = {
"time_budget": 3,
"metric": "mape",
"task": "forecast",
"eval_method": "holdout",
"label": "y",
"log_training_metric": True, # This should not cause errors
"estimator_list": [estimator],
}
automl.fit(dataframe=df, **settings, period=time_horizon, force_cancel=True)
print(f" ✅ {estimator} SUCCESS with log_training_metric=True")
if automl.best_estimator:
assert automl.best_estimator == estimator


if __name__ == "__main__":
# test_forecast_automl(60)
# test_multivariate_forecast_num(5)
# test_multivariate_forecast_cat(5)
test_numpy()
# test_numpy()
# test_forecast_classification(5)
# test_forecast_panel(5)
# test_cv_step()
test_log_training_metric_ts_models()
Loading