fix: support schema creation for empty multiindex

shuoweil · shuoweil · commit f03758d47ec7 · 2026-03-19T21:41:37.000Z
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -3457,19 +3457,44 @@ def _pd_index_to_array_value(
     Create an ArrayValue from a list of label tuples.
     The last column will be row offsets.
     """
+    id_gen = bigframes.core.identifiers.standard_id_strings()
+    col_ids = [next(id_gen) for _ in range(index.nlevels)]
+    offset_id = next(id_gen)
+
     rows = []
     labels_as_tuples = utils.index_as_tuples(index)
     for row_offset in range(len(index)):
-        id_gen = bigframes.core.identifiers.standard_id_strings()
         row_label = labels_as_tuples[row_offset]
         row_label = (row_label,) if not isinstance(row_label, tuple) else row_label
         row = {}
-        for label_part, id in zip(row_label, id_gen):
-            row[id] = label_part if pd.notnull(label_part) else None
-        row[next(id_gen)] = row_offset
+        for label_part, col_id in zip(row_label, col_ids):
+            row[col_id] = label_part if pd.notnull(label_part) else None
+        row[offset_id] = row_offset
         rows.append(row)
 
-    return core.ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=session)
+    if not rows:
+        dtypes_list = getattr(index, "dtypes", None)
+        if dtypes_list is None:
+            dtypes_list = (
+                [index.dtype]
+                if hasattr(index, "dtype")
+                else [pd.Float64Dtype()] * index.nlevels
+            )
+        fields = []
+        for col_id, dtype in zip(col_ids, dtypes_list):
+            try:
+                pa_type = bigframes.dtypes.bigframes_dtype_to_arrow_dtype(dtype)
+            except TypeError:
+                pa_type = pa.string()
+            fields.append(pa.field(col_id, pa_type))
+        fields.append(pa.field(offset_id, pa.int64()))
+        schema = pa.schema(fields)
+        pt = pa.Table.from_pylist([], schema=schema)
+    else:
+        pt = pa.Table.from_pylist(rows)
+        pt = pt.rename_columns([*col_ids, offset_id])
+
+    return core.ArrayValue.from_pyarrow(pt, session=session)
 
 
 def _resolve_index_col(
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
@@ -1490,3 +1490,28 @@ def test_multiindex_eq_const(scalars_df_index, scalars_pandas_df_index):
     bigframes.testing.utils.assert_index_equal(
         pandas.Index(pd_result, dtype="boolean"), bf_result.to_pandas()
     )
+
+
+def test_count_empty_multiindex_columns(session):
+    df = pandas.DataFrame(
+        [], index=[1, 2], columns=pandas.MultiIndex.from_tuples([], names=["a", "b"])
+    )
+    bdf = session.read_pandas(df)
+
+    # count() operation unpivots columns, triggering the empty MultiIndex bug internally
+    count_df = bdf.count().to_pandas()
+    assert count_df.shape == (0,)
+    assert count_df.index.nlevels == 2
+    assert list(count_df.index.names) == ["a", "b"]
+
+
+def test_count_preserves_multiindex_columns(session):
+    # Tests that `melt` operations via count do not cause MultiIndex drops in Arrow
+    df = pandas.DataFrame({"A": [1], "B": ["string"], "C": [3]})
+    df.columns = pandas.MultiIndex.from_tuples(
+        [("Group1", "A"), ("Group2", "B"), ("Group1", "C")]
+    )
+    bdf = session.read_pandas(df)
+
+    count_df = bdf.count().to_pandas()
+    assert count_df.shape[0] == 3