Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit f03758d

Browse files
committed
fix: support schema creation for empty multiindex
1 parent 494a0a1 commit f03758d

File tree

2 files changed

+55
-5
lines changed

2 files changed

+55
-5
lines changed

bigframes/core/blocks.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3457,19 +3457,44 @@ def _pd_index_to_array_value(
34573457
Create an ArrayValue from a list of label tuples.
34583458
The last column will be row offsets.
34593459
"""
3460+
id_gen = bigframes.core.identifiers.standard_id_strings()
3461+
col_ids = [next(id_gen) for _ in range(index.nlevels)]
3462+
offset_id = next(id_gen)
3463+
34603464
rows = []
34613465
labels_as_tuples = utils.index_as_tuples(index)
34623466
for row_offset in range(len(index)):
3463-
id_gen = bigframes.core.identifiers.standard_id_strings()
34643467
row_label = labels_as_tuples[row_offset]
34653468
row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
34663469
row = {}
3467-
for label_part, id in zip(row_label, id_gen):
3468-
row[id] = label_part if pd.notnull(label_part) else None
3469-
row[next(id_gen)] = row_offset
3470+
for label_part, col_id in zip(row_label, col_ids):
3471+
row[col_id] = label_part if pd.notnull(label_part) else None
3472+
row[offset_id] = row_offset
34703473
rows.append(row)
34713474

3472-
return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
3475+
if not rows:
3476+
dtypes_list = getattr(index, "dtypes", None)
3477+
if dtypes_list is None:
3478+
dtypes_list = (
3479+
[index.dtype]
3480+
if hasattr(index, "dtype")
3481+
else [pd.Float64Dtype()] * index.nlevels
3482+
)
3483+
fields = []
3484+
for col_id, dtype in zip(col_ids, dtypes_list):
3485+
try:
3486+
pa_type = bigframes.dtypes.bigframes_dtype_to_arrow_dtype(dtype)
3487+
except TypeError:
3488+
pa_type = pa.string()
3489+
fields.append(pa.field(col_id, pa_type))
3490+
fields.append(pa.field(offset_id, pa.int64()))
3491+
schema = pa.schema(fields)
3492+
pt = pa.Table.from_pylist([], schema=schema)
3493+
else:
3494+
pt = pa.Table.from_pylist(rows)
3495+
pt = pt.rename_columns([*col_ids, offset_id])
3496+
3497+
return core.ArrayValue.from_pyarrow(pt, session=session)
34733498

34743499

34753500
def _resolve_index_col(

tests/system/small/test_multiindex.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,3 +1490,28 @@ def test_multiindex_eq_const(scalars_df_index, scalars_pandas_df_index):
14901490
bigframes.testing.utils.assert_index_equal(
14911491
pandas.Index(pd_result, dtype="boolean"), bf_result.to_pandas()
14921492
)
1493+
1494+
1495+
def test_count_empty_multiindex_columns(session):
1496+
df = pandas.DataFrame(
1497+
[], index=[1, 2], columns=pandas.MultiIndex.from_tuples([], names=["a", "b"])
1498+
)
1499+
bdf = session.read_pandas(df)
1500+
1501+
# count() operation unpivots columns, triggering the empty MultiIndex bug internally
1502+
count_df = bdf.count().to_pandas()
1503+
assert count_df.shape == (0,)
1504+
assert count_df.index.nlevels == 2
1505+
assert list(count_df.index.names) == ["a", "b"]
1506+
1507+
1508+
def test_count_preserves_multiindex_columns(session):
1509+
# Tests that `melt` operations via count do not cause MultiIndex drops in Arrow
1510+
df = pandas.DataFrame({"A": [1], "B": ["string"], "C": [3]})
1511+
df.columns = pandas.MultiIndex.from_tuples(
1512+
[("Group1", "A"), ("Group2", "B"), ("Group1", "C")]
1513+
)
1514+
bdf = session.read_pandas(df)
1515+
1516+
count_df = bdf.count().to_pandas()
1517+
assert count_df.shape[0] == 3

0 commit comments

Comments
 (0)