Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 85 additions & 1 deletion bigframes/extensions/pandas/dataframe_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import cast
from typing import cast, Iterable, Optional

import pandas
import pandas.api.extensions
Expand All @@ -21,6 +21,83 @@
import bigframes.pandas as bpd


class AIAccessor:
"""
Pandas DataFrame accessor for BigQuery AI functions.
"""

def __init__(self, pandas_obj: pandas.DataFrame):
self._obj = pandas_obj

def forecast(
self,
*,
data_col: str,
timestamp_col: str,
model: str = "TimesFM 2.0",
id_cols: Optional[Iterable[str]] = None,
horizon: int = 10,
confidence_level: float = 0.95,
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jules Missing the output_historical_time_series boolean parameter.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the missing output_historical_time_series boolean parameter and documented it.

context_window: Optional[int] = None,
session=None,
) -> pandas.DataFrame:
"""
Forecast time series at future horizon using BigQuery AI.FORECAST.

The DataFrame is converted to BigFrames by calling ``read_pandas``, then the forecast
is generated using ``bigframes.bigquery.ai.forecast``, and the result is
converted back to a pandas DataFrame using ``to_pandas``.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jules This is an implementation detail that doesn't belong in the docstring.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the implementation details regarding read_pandas and to_pandas from the docstring.


Args:
data_col (str):
A str value that specifies the name of the data column. The data column contains the data to forecast.
The data column must use one of the following data types: INT64, NUMERIC and FLOAT64
timestamp_col (str):
A str value that specified the name of the time points column.
The time points column provides the time points used to generate the forecast.
The time points column must use one of the following data types: TIMESTAMP, DATE and DATETIME
model (str, default "TimesFM 2.0"):
A str value that specifies the name of the model. TimesFM 2.0 is the only supported value, and is the default value.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a link to the standard-sql documentation for ai.forecast and updated the model param documentation to include TimesFM 2.5.

id_cols (Iterable[str], optional):
An iterable of str value that specifies the names of one or more ID columns. Each ID identifies a unique time series to forecast.
Specify one or more values for this argument in order to forecast multiple time series using a single query.
The columns that you specify must use one of the following data types: STRING, INT64, ARRAY<STRING> and ARRAY<INT64>
horizon (int, default 10):
An int value that specifies the number of time points to forecast. The default value is 10. The valid input range is [1, 10,000].
confidence_level (float, default 0.95):
A FLOAT64 value that specifies the percentage of the future values that fall in the prediction interval.
The default value is 0.95. The valid input range is [0, 1).
context_window (int, optional):
An int value that specifies the context window length used by BigQuery ML's built-in TimesFM model.
The context window length determines how many of the most recent data points from the input time series are use by the model.
If you don't specify a value, the AI.FORECAST function automatically chooses the smallest possible context window length to use
that is still large enough to cover the number of time series data points in your input data.
session (bigframes.session.Session, optional):
The BigFrames session to use. If not provided, the default global session is used.

Returns:
pandas.DataFrame:
The forecast DataFrame result.
"""
import bigframes.bigquery.ai

if session is None:
session = bf_session.get_global_session()

bf_df = cast(bpd.DataFrame, session.read_pandas(self._obj))
result = bigframes.bigquery.ai.forecast(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious if this triggers our logger.

My gut feeling says yes, because our logger only seems to track the call stack of annotated classes / functions. However, I still feel like to double check with you on this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should trigger the logger, but I think we should probably add labels that are specific to the dataframe accessor. I'll follow-up with that.

bf_df,
data_col=data_col,
timestamp_col=timestamp_col,
model=model,
id_cols=id_cols,
horizon=horizon,
confidence_level=confidence_level,
context_window=context_window,
)
return result.to_pandas(ordered=True)


@pandas.api.extensions.register_dataframe_accessor("bigquery")
class BigQueryDataFrameAccessor:
"""
Expand All @@ -32,6 +109,13 @@ class BigQueryDataFrameAccessor:
def __init__(self, pandas_obj: pandas.DataFrame):
self._obj = pandas_obj

@property
def ai(self) -> "AIAccessor":
"""
Accessor for BigQuery AI functions.
"""
return AIAccessor(self._obj)

def sql_scalar(self, sql_template: str, *, output_dtype=None, session=None):
"""
Compute a new pandas Series by applying a SQL scalar function to the DataFrame.
Expand Down
25 changes: 25 additions & 0 deletions tests/system/small/extensions/test_dataframe_accessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pandas as pd
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jules missing the license header.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the missing Apache 2.0 license header to tests/system/small/extensions/test_dataframe_accessor.py.

import pytest

import bigframes.pandas as bpd

def test_pandas_ai_forecast(session):
df = pd.DataFrame(
{
"parsed_date": pd.to_datetime(
["2017-01-01", "2017-01-02", "2017-01-03", "2017-01-04", "2017-01-05"]
),
"total_visits": [10.0, 20.0, 30.0, 40.0, 50.0],
"id": ["1", "1", "1", "1", "1"]
}
)

result = df.bigquery.ai.forecast(
timestamp_col="parsed_date",
data_col="total_visits",
horizon=1,
session=session,
)

assert "forecast_timestamp" in result.columns
assert "forecast_value" in result.columns
36 changes: 36 additions & 0 deletions tests/unit/core/compile/sqlglot/test_dataframe_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,39 @@ def to_pandas(series, *, ordered):

session.read_pandas.assert_called_once()
snapshot.assert_match(result, "out.sql")


def test_ai_forecast(snapshot, monkeypatch):
import bigframes.session
import bigframes.bigquery.ai
session = mock.create_autospec(bigframes.session.Session)
bf_df = mock.create_autospec(bpd.DataFrame)
session.read_pandas.return_value = bf_df

def mock_ai_forecast(df, **kwargs):
assert df is bf_df
result_df = mock.create_autospec(bpd.DataFrame)
result_df.to_pandas.return_value = kwargs
return result_df

import bigframes.bigquery.ai
monkeypatch.setattr(bigframes.bigquery.ai, "forecast", mock_ai_forecast)

df = pd.DataFrame({"date": ["2020-01-01"], "value": [1.0]})
result = df.bigquery.ai.forecast(
timestamp_col="date",
data_col="value",
horizon=5,
session=session,
)

session.read_pandas.assert_called_once()
assert result == {
"timestamp_col": "date",
"data_col": "value",
"model": "TimesFM 2.0",
"id_cols": None,
"horizon": 5,
"confidence_level": 0.95,
"context_window": None,
}
Loading