Skip to content

Commit 91ce839

Browse files
authored
Merge branch 'main' into shuowei-feat-persist-obj-ref
2 parents 2f67d9c + 3ddd7eb commit 91ce839

File tree

6 files changed

+142
-3
lines changed

6 files changed

+142
-3
lines changed

bigframes/bigquery/_operations/ai.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,7 @@ def forecast(
880880
id_cols: Iterable[str] | None = None,
881881
horizon: int = 10,
882882
confidence_level: float = 0.95,
883+
output_historical_time_series: bool = False,
883884
context_window: int | None = None,
884885
) -> dataframe.DataFrame:
885886
"""
@@ -914,6 +915,15 @@ def forecast(
914915
confidence_level (float, default 0.95):
915916
A FLOAT64 value that specifies the percentage of the future values that fall in the prediction interval.
916917
The default value is 0.95. The valid input range is [0, 1).
918+
output_historical_time_series (bool, default False):
919+
A BOOL value that determines whether the input data is returned
920+
along with the forecasted data. Set this argument to TRUE to return
921+
input data. The default value is FALSE.
922+
923+
Returning the input data along with the forecasted data lets you
924+
compare the historical value of the data column with the forecasted
925+
value of the data column, or chart the change in the data column
926+
values over time.
917927
context_window (int, optional):
918928
An int value that specifies the context window length used by BigQuery ML's built-in TimesFM model.
919929
The context window length determines how many of the most recent data points from the input time series are use by the model.
@@ -945,6 +955,7 @@ def forecast(
945955
"timestamp_col": timestamp_col,
946956
"model": model,
947957
"horizon": horizon,
958+
"output_historical_time_series": output_historical_time_series,
948959
"confidence_level": confidence_level,
949960
}
950961
if id_cols:

bigframes/extensions/pandas/dataframe_accessor.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from typing import cast
15+
from typing import cast, Iterable, Optional
1616

1717
import pandas
1818
import pandas.api.extensions
@@ -21,6 +21,85 @@
2121
import bigframes.pandas as bpd
2222

2323

24+
class AIAccessor:
25+
"""
26+
Pandas DataFrame accessor for BigQuery AI functions.
27+
"""
28+
29+
def __init__(self, pandas_obj: pandas.DataFrame):
30+
self._obj = pandas_obj
31+
32+
def forecast(
33+
self,
34+
*,
35+
data_col: str,
36+
timestamp_col: str,
37+
model: str = "TimesFM 2.0",
38+
id_cols: Optional[Iterable[str]] = None,
39+
horizon: int = 10,
40+
confidence_level: float = 0.95,
41+
context_window: Optional[int] = None,
42+
output_historical_time_series: bool = False,
43+
session=None,
44+
) -> pandas.DataFrame:
45+
"""
46+
Forecast time series at future horizon using BigQuery AI.FORECAST.
47+
48+
See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-forecast
49+
50+
Args:
51+
data_col (str):
52+
A str value that specifies the name of the data column. The data column contains the data to forecast.
53+
The data column must use one of the following data types: INT64, NUMERIC and FLOAT64
54+
timestamp_col (str):
55+
A str value that specified the name of the time points column.
56+
The time points column provides the time points used to generate the forecast.
57+
The time points column must use one of the following data types: TIMESTAMP, DATE and DATETIME
58+
model (str, default "TimesFM 2.0"):
59+
A str value that specifies the name of the model. "TimesFM 2.0" and "TimesFM 2.5" are supported.
60+
id_cols (Iterable[str], optional):
61+
An iterable of str value that specifies the names of one or more ID columns. Each ID identifies a unique time series to forecast.
62+
Specify one or more values for this argument in order to forecast multiple time series using a single query.
63+
The columns that you specify must use one of the following data types: STRING, INT64, ARRAY<STRING> and ARRAY<INT64>
64+
horizon (int, default 10):
65+
An int value that specifies the number of time points to forecast. The default value is 10. The valid input range is [1, 10,000].
66+
confidence_level (float, default 0.95):
67+
A FLOAT64 value that specifies the percentage of the future values that fall in the prediction interval.
68+
The default value is 0.95. The valid input range is [0, 1).
69+
context_window (int, optional):
70+
An int value that specifies the context window length used by BigQuery ML's built-in TimesFM model.
71+
The context window length determines how many of the most recent data points from the input time series are use by the model.
72+
If you don't specify a value, the AI.FORECAST function automatically chooses the smallest possible context window length to use
73+
that is still large enough to cover the number of time series data points in your input data.
74+
output_historical_time_series (bool, default False):
75+
A boolean value that determines whether to include the input time series history in the forecast.
76+
session (bigframes.session.Session, optional):
77+
The BigFrames session to use. If not provided, the default global session is used.
78+
79+
Returns:
80+
pandas.DataFrame:
81+
The forecast DataFrame result.
82+
"""
83+
import bigframes.bigquery.ai
84+
85+
if session is None:
86+
session = bf_session.get_global_session()
87+
88+
bf_df = cast(bpd.DataFrame, session.read_pandas(self._obj))
89+
result = bigframes.bigquery.ai.forecast(
90+
bf_df,
91+
data_col=data_col,
92+
timestamp_col=timestamp_col,
93+
model=model,
94+
id_cols=id_cols,
95+
horizon=horizon,
96+
confidence_level=confidence_level,
97+
context_window=context_window,
98+
output_historical_time_series=output_historical_time_series,
99+
)
100+
return result.to_pandas(ordered=True)
101+
102+
24103
@pandas.api.extensions.register_dataframe_accessor("bigquery")
25104
class BigQueryDataFrameAccessor:
26105
"""
@@ -32,6 +111,13 @@ class BigQueryDataFrameAccessor:
32111
def __init__(self, pandas_obj: pandas.DataFrame):
33112
self._obj = pandas_obj
34113

114+
@property
115+
def ai(self) -> "AIAccessor":
116+
"""
117+
Accessor for BigQuery AI functions.
118+
"""
119+
return AIAccessor(self._obj)
120+
35121
def sql_scalar(self, sql_template: str, *, output_dtype=None, session=None):
36122
"""
37123
Compute a new pandas Series by applying a SQL scalar function to the DataFrame.

tests/system/large/ml/test_forecasting.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def test_arima_plus_model_fit_score(
8888
result,
8989
columns=expected_columns,
9090
index=2 if id_col_name else 1,
91+
col_exact=False,
9192
)
9293

9394
# save, load to ensure configuration was kept

tests/system/large/ml/test_llm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ def test_llm_gemini_score(llm_fine_tune_df_default_index, model_name):
198198
"evaluation_status",
199199
],
200200
index=1,
201+
col_exact=False,
201202
)
202203

203204

@@ -226,6 +227,7 @@ def test_llm_gemini_pro_score_params(llm_fine_tune_df_default_index, model_name)
226227
"label",
227228
"evaluation_status",
228229
],
230+
col_exact=False,
229231
)
230232

231233

tests/system/small/ml/test_forecasting.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ def test_arima_plus_score(
493493
dtype="Float64",
494494
)
495495
pd.testing.assert_frame_equal(
496-
result,
496+
result[expected.columns],
497497
expected,
498498
rtol=0.1,
499499
check_index_type=False,
@@ -594,7 +594,7 @@ def test_arima_plus_score_series(
594594
dtype="Float64",
595595
)
596596
pd.testing.assert_frame_equal(
597-
result,
597+
result[expected.columns],
598598
expected,
599599
rtol=0.1,
600600
check_index_type=False,

tests/unit/core/compile/sqlglot/test_dataframe_accessor.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,42 @@ def to_pandas(series, *, ordered):
4343

4444
session.read_pandas.assert_called_once()
4545
snapshot.assert_match(result, "out.sql")
46+
47+
48+
def test_ai_forecast(snapshot, monkeypatch):
49+
import bigframes.bigquery.ai
50+
import bigframes.session
51+
52+
session = mock.create_autospec(bigframes.session.Session)
53+
bf_df = mock.create_autospec(bpd.DataFrame)
54+
session.read_pandas.return_value = bf_df
55+
56+
def mock_ai_forecast(df, **kwargs):
57+
assert df is bf_df
58+
result_df = mock.create_autospec(bpd.DataFrame)
59+
result_df.to_pandas.return_value = kwargs
60+
return result_df
61+
62+
import bigframes.bigquery.ai
63+
64+
monkeypatch.setattr(bigframes.bigquery.ai, "forecast", mock_ai_forecast)
65+
66+
df = pd.DataFrame({"date": ["2020-01-01"], "value": [1.0]})
67+
result = df.bigquery.ai.forecast(
68+
timestamp_col="date",
69+
data_col="value",
70+
horizon=5,
71+
session=session,
72+
)
73+
74+
session.read_pandas.assert_called_once()
75+
assert result == {
76+
"timestamp_col": "date",
77+
"data_col": "value",
78+
"model": "TimesFM 2.0",
79+
"id_cols": None,
80+
"horizon": 5,
81+
"confidence_level": 0.95,
82+
"context_window": None,
83+
"output_historical_time_series": False,
84+
}

0 commit comments

Comments
 (0)