fix(Datapoints): restore plain identifier column names in returned dataframes (#2603)

haakonvt · web-flow · commit 9e1e9103fdf7 · 2026-05-05T05:06:39.000Z
diff --git a/cognite/client/utils/_pandas_helpers.py b/cognite/client/utils/_pandas_helpers.py
@@ -352,10 +352,10 @@ def _create_multi_index_from_columns(
     include_aggregate: bool,
     include_granularity: bool,
     include_unit: bool,
-) -> pd.MultiIndex:
+) -> pd.Index:
     import pandas as pd
 
-    column_ids_df = pd.DataFrame(
+    column_ids = pd.DataFrame(
         [
             col.as_multi_index_tuple(
                 include_aggregate=include_aggregate,
@@ -368,9 +368,13 @@ def _create_multi_index_from_columns(
     )
     # Key operation is to drop all-nan columns, which in the multi-index translates to dropping
     # the corresponding levels:
-    non_id_levels = column_ids_df.iloc[:, 1:].dropna(axis="columns", how="all").fillna("")
+    non_id_levels = column_ids.iloc[:, 1:].dropna(axis="columns", how="all").fillna("")
+    # When none of the extra levels survive (status/agg./gran./unit), return a plain Index so
+    # columns are the bare identifiers rather than 1-tuples:
+    if non_id_levels.columns.empty:
+        return pd.Index(column_ids["identifier"])
     # ...but we always keep the identifier column:
-    return pd.MultiIndex.from_frame(pd.concat((column_ids_df.iloc[:, [0]], non_id_levels), axis=1, copy=False))
+    return pd.MultiIndex.from_frame(pd.concat((column_ids[["identifier"]], non_id_levels), axis=1, copy=False))
 
 
 def _create_timestamp_index(
diff --git a/scripts/test-pyodide.js b/scripts/test-pyodide.js
@@ -46,7 +46,7 @@ server.listen(PORT, () => {
     // stlite bumps to a Pyodide release that ships cryptography>=45.0.1
     // (Pyodide 0.29.0 already does). After the expiry date, the workaround is
     // skipped — if it's still needed the install will fail loudly.
-    if (new Date() < new Date("2026-05-03")) {
+    if (new Date() < new Date("2026-07-04")) {
       await pyodide.loadPackage(["cryptography", "ssl"]);
       await micropip.install("authlib<1.7");
     }
diff --git a/tests/tests_unit/test_api/test_datapoints.py b/tests/tests_unit/test_api/test_datapoints.py
@@ -646,7 +646,7 @@ def test_datapoints_no_names(self) -> None:
 
         d = Datapoints(id=1, is_string=False, is_step=False, type="numeric", timestamp=[1, 2, 3], average=[2, 3, 4])
         expected_df = pd.DataFrame({1: [2, 3, 4.0]}, index=pd.to_datetime(range(1, 4), unit="ms"))
-        expected_df.columns = pd.MultiIndex.from_tuples([(1,)], names=["identifier"])
+        expected_df.columns = pd.Index([1], name="identifier")
         pd.testing.assert_frame_equal(expected_df, d.to_pandas(include_aggregate_name=False))
 
         expected_df = pd.DataFrame({1: [2, 3, 4.0]}, index=pd.to_datetime(range(1, 4), unit="ms"))
@@ -676,6 +676,40 @@ def test_id_and_external_id_set_gives_external_id_columns(self) -> None:
         )
         pd.testing.assert_frame_equal(expected_df, d.to_pandas())
 
+    def test_raw_datapoints_external_id_gives_plain_index(self) -> None:
+        import pandas as pd
+
+        d = Datapoints(
+            id=1,
+            external_id="my-ts",
+            is_string=False,
+            is_step=False,
+            type="numeric",
+            timestamp=[1, 2],
+            value=[3.0, 4.0],
+        )
+        df = d.to_pandas()
+        assert isinstance(df.columns, pd.Index) and not isinstance(df.columns, pd.MultiIndex)
+        assert list(df.columns) == ["my-ts"]
+
+    def test_raw_datapoints_with_status_codes_gives_multi_index(self) -> None:
+        import pandas as pd
+
+        d = Datapoints(
+            id=1,
+            external_id="my-ts",
+            is_string=False,
+            is_step=False,
+            type="numeric",
+            timestamp=[1, 2],
+            value=[3.0, 4.0],
+            status_code=[0, 0],
+            status_symbol=["Good", "Good"],
+        )
+        df = d.to_pandas()
+        assert isinstance(df.columns, pd.MultiIndex)
+        assert df.columns.names == ["identifier", "status"]
+
     def test_datapoints_empty(self) -> None:
         d = Datapoints(id=0, is_string=False, is_step=False, type="numeric", external_id="1", timestamp=[], value=[])
         assert d.to_pandas().empty
@@ -729,7 +763,7 @@ def test_datapoints_list_names(self) -> None:
         expected_df = pd.DataFrame({1: [2, 3, 4.0], 2: [1, None, 3]}, index=pd.to_datetime(range(1, 4), unit="ms"))
         expected_df.columns = pd.MultiIndex.from_tuples([(2, "max"), (3, "average")], names=["identifier", "aggregate"])
         pd.testing.assert_frame_equal(expected_df, dps_list.to_pandas(), check_freq=False)
-        expected_df.columns = pd.MultiIndex.from_tuples([(2,), (3,)], names=["identifier"])
+        expected_df.columns = pd.Index([2, 3], name="identifier")
         pd.testing.assert_frame_equal(expected_df, dps_list.to_pandas(include_aggregate_name=False), check_freq=False)
 
     def test_datapoints_list_names_dup(self) -> None:
@@ -759,7 +793,7 @@ def test_datapoints_list_non_aligned(self) -> None:
             {1: [1, 2, 3, None, None], 2: [None, None, 3, 4, 5]},
             index=pd.to_datetime(range(1, 6), unit="ms"),
         )
-        expected_df.columns = pd.MultiIndex.from_tuples([(1,), (2,)], names=["identifier"])
+        expected_df.columns = pd.Index([1, 2], name="identifier")
         pd.testing.assert_frame_equal(expected_df, dps_list.to_pandas(), check_freq=False)
 
     def test_datapoints_list_empty(self) -> None:
diff --git a/tests/tests_unit/test_data_classes/test_datapoints.py b/tests/tests_unit/test_data_classes/test_datapoints.py
@@ -113,7 +113,5 @@ def test_identifier_priority(self, dps_lst_cls: type[CogniteResourceList]) -> No
             {1: 2.0, 2: 4.0, 3: 6.0},
             index=np.array([1234 * 1_000_000], dtype="datetime64[ns]"),
         )
-        exp_df.columns = pd.MultiIndex.from_tuples(
-            [(123,), ("foo",), (NodeId(space="s", external_id="x"),)], names=["identifier"]
-        )
+        exp_df.columns = pd.Index([123, "foo", NodeId(space="s", external_id="x")], name="identifier")
         pd.testing.assert_frame_equal(df, exp_df)

Original file line number	Diff line number	Diff line change
`@@ -46,7 +46,7 @@ server.listen(PORT, () => {`
`46`	`46`	`// stlite bumps to a Pyodide release that ships cryptography>=45.0.1`
`47`	`47`	`// (Pyodide 0.29.0 already does). After the expiry date, the workaround is`
`48`	`48`	`// skipped — if it's still needed the install will fail loudly.`
`49`		`- if (new Date() < new Date("2026-05-03")) {`
	`49`	`+ if (new Date() < new Date("2026-07-04")) {`
`50`	`50`	`await pyodide.loadPackage(["cryptography", "ssl"]);`
`51`	`51`	`await micropip.install("authlib<1.7");`
`52`	`52`	`}`
Original file line number	Diff line number	Diff line change
`@@ -113,7 +113,5 @@ def test_identifier_priority(self, dps_lst_cls: type[CogniteResourceList]) -> No`
`113`	`113`	`{1: 2.0, 2: 4.0, 3: 6.0},`
`114`	`114`	`index=np.array([1234 * 1_000_000], dtype="datetime64[ns]"),`
`115`	`115`	`)`
`116`		`- exp_df.columns = pd.MultiIndex.from_tuples(`
`117`		`- [(123,), ("foo",), (NodeId(space="s", external_id="x"),)], names=["identifier"]`
`118`		`- )`
	`116`	`+ exp_df.columns = pd.Index([123, "foo", NodeId(space="s", external_id="x")], name="identifier")`
`119`	`117`	`pd.testing.assert_frame_equal(df, exp_df)`