diff --git a/bigframes/pandas/core/methods/describe.py b/bigframes/pandas/core/methods/describe.py index 6fd7960daf3..34c116ba27d 100644 --- a/bigframes/pandas/core/methods/describe.py +++ b/bigframes/pandas/core/methods/describe.py @@ -56,9 +56,10 @@ def describe( "max", ] ).intersection(describe_block.column_labels.get_level_values(-1)) - describe_block = describe_block.stack(override_labels=stack_cols) - - return dataframe.DataFrame(describe_block).droplevel(level=0) + if not stack_cols.empty: + describe_block = describe_block.stack(override_labels=stack_cols) + return dataframe.DataFrame(describe_block).droplevel(level=0) + return dataframe.DataFrame(describe_block) def _describe( @@ -120,5 +121,7 @@ def _get_aggs_for_dtype(dtype) -> list[aggregations.UnaryAggregateOp]: dtypes.TIME_DTYPE, ]: return [aggregations.count_op, aggregations.nunique_op] + elif dtypes.is_json_like(dtype) or dtype == dtypes.OBJ_REF_DTYPE: + return [aggregations.count_op] else: return [] diff --git a/tests/system/small/pandas/test_describe.py b/tests/system/small/pandas/test_describe.py index 6f288115128..b8e427c10ea 100644 --- a/tests/system/small/pandas/test_describe.py +++ b/tests/system/small/pandas/test_describe.py @@ -15,6 +15,8 @@ import pandas.testing import pytest +import bigframes.pandas as bpd + def test_df_describe_non_temporal(scalars_dfs): # TODO: supply a reason why this isn't compatible with pandas 1.x @@ -352,3 +354,40 @@ def test_series_groupby_describe(scalars_dfs): check_dtype=False, check_index_type=False, ) + + +def test_describe_json_and_obj_ref_returns_count(session): + # Test describe() works on JSON and OBJ_REF types (without nunique, which fails) + sql = """ + SELECT + PARSE_JSON('{"a": 1}') AS json_col, + 'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col + """ + df = session.read_gbq(sql) + + df["obj_ref_col"] = df["uri_col"].str.to_blob() + df = df.drop(columns=["uri_col"]) + + res = df.describe(include="all").to_pandas() + + assert "count" in res.index + assert res.loc["count", "json_col"] == 1.0 + assert res.loc["count", "obj_ref_col"] == 1.0 + + +def test_describe_with_unsupported_type_returns_empty_dataframe(session): + df = session.read_gbq("SELECT ST_GEOGPOINT(1.0, 2.0) AS geo_col") + + res = df.describe().to_pandas() + + assert len(res.columns) == 0 + assert len(res.index) == 1 + + +def test_describe_empty_dataframe_returns_empty_dataframe(session): + df = bpd.DataFrame() + + res = df.describe().to_pandas() + + assert len(res.columns) == 0 + assert len(res.index) == 1