Skip to content
19 changes: 14 additions & 5 deletions flaml/automl/time_series/ts_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,12 +245,13 @@ def prettify_prediction(self, y_pred: Union[pd.DataFrame, pd.Series, np.ndarray]

else:
if isinstance(y_pred, np.ndarray):
raise ValueError("Can't enrich np.ndarray as self.test_data is None")
y_pred = pd.DataFrame(data=y_pred, columns=self.target_names)
elif isinstance(y_pred, pd.Series):
assert len(self.target_names) == 1, "Not enough columns in y_pred"
y_pred = pd.DataFrame({self.target_names[0]: y_pred})
# TODO auto-create the timestamps for the time column instead of throwing
raise NotImplementedError("Need a non-None test_data for this to work, for now")
if self.time_col not in y_pred.columns:
forward_frame = create_forward_frame(self.frequency, len(y_pred), self.end_date, self.time_col)
y_pred[self.time_col] = forward_frame[self.time_col].values

assert isinstance(y_pred, pd.DataFrame)
assert self.time_col in y_pred.columns
Expand Down Expand Up @@ -499,10 +500,18 @@ def fit_transform(self, X: Union[DataFrame, np.array], y):
def create_forward_frame(
frequency: str,
steps: int,
test_end_date: datetime.datetime,
last_timestamp: datetime.datetime,
time_col: str,
):
start_date = test_end_date + pd.Timedelta(1, frequency)
if frequency is None:
raise ValueError("frequency cannot be None")
if last_timestamp is None or pd.isna(last_timestamp):
raise ValueError(f"last_timestamp cannot be None or NaT, got {last_timestamp!r}")
try:
offset = pd.tseries.frequencies.to_offset(frequency)
except ValueError as e:
raise ValueError(f"Invalid frequency {frequency!r}; expected a pandas offset alias.") from e
start_date = last_timestamp + offset
times = pd.date_range(
start=start_date,
periods=steps,
Expand Down
63 changes: 63 additions & 0 deletions test/automl/test_ts_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np
import pandas as pd

from flaml.automl.time_series.ts_data import TimeSeriesDataset, create_forward_frame


def test_prettify_prediction_generates_timestamps_without_test_data():
train_data = pd.DataFrame(
{
"ds": pd.date_range("2020-01-01", periods=4, freq="D"),
"y": [1.0, 2.0, 3.0, 4.0],
}
)
dataset = TimeSeriesDataset(train_data, time_col="ds", target_names="y")
expected_times = pd.date_range("2020-01-05", periods=2, freq="D")

for y_pred in (
pd.DataFrame({"y": [5.0, 6.0]}, index=[10, 11]),
pd.Series([5.0, 6.0]),
np.array([5.0, 6.0]),
):
prediction = dataset.prettify_prediction(y_pred)
assert isinstance(prediction, pd.DataFrame)
pd.testing.assert_series_equal(prediction["ds"], pd.Series(expected_times, name="ds"), check_index=False)
assert prediction["y"].tolist() == [5.0, 6.0]


def test_prettify_prediction_generates_monthly_timestamps_without_test_data():
train_data = pd.DataFrame(
{
"ds": pd.date_range("2020-01-01", periods=4, freq="MS"),
"y": [1.0, 2.0, 3.0, 4.0],
}
)
dataset = TimeSeriesDataset(train_data, time_col="ds", target_names="y")

prediction = dataset.prettify_prediction(pd.DataFrame({"y": [5.0, 6.0]}))

pd.testing.assert_series_equal(
prediction["ds"],
pd.Series(pd.date_range("2020-05-01", periods=2, freq="MS"), name="ds"),
check_index=False,
)
assert prediction["y"].tolist() == [5.0, 6.0]


def test_create_forward_frame_uses_next_frequency_offset():
# Pandas 3 uses QE-DEC while older supported versions use Q-DEC.
quarter_end_freq = "QE-DEC"
try:
pd.tseries.frequencies.to_offset(quarter_end_freq)
except ValueError:
quarter_end_freq = "Q-DEC"

weekly_frame = create_forward_frame("W-SUN", 2, pd.Timestamp("2020-01-05"), "ds")
quarterly_frame = create_forward_frame(quarter_end_freq, 2, pd.Timestamp("2020-03-31"), "ds")

pd.testing.assert_series_equal(
weekly_frame["ds"], pd.Series(pd.date_range("2020-01-12", periods=2, freq="W-SUN"), name="ds")
)
pd.testing.assert_series_equal(
quarterly_frame["ds"], pd.Series(pd.date_range("2020-06-30", periods=2, freq=quarter_end_freq), name="ds")
)
Loading