-
Notifications
You must be signed in to change notification settings - Fork 69
feat: support nested STRUCT and ARRAY data display in anywidget mode #2359
Changes from 39 commits
f20cde5
19e2c4f
4b68243
8a7609a
ceca74d
63e4a3c
3affd92
c53da80
fa37000
60785f3
0a88b10
f32a53f
3944249
ce59668
41df7b3
e364674
8682d55
159d6a5
68b7fbb
5cfa8d7
0b73c0a
21a5d5c
36a9a37
4d46e3c
0f48f82
a8a39dc
dfe5fec
15bdf54
59c3a2a
6d28d28
09635e6
2de5a3c
fc122a5
9a19966
9886e5f
b2166ed
a34802e
7763818
27ae231
f74f82a
03eba5e
eea0a87
ca19957
4e9eaa4
cb7ae87
fb2d029
d2710c2
83b042d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,7 @@ | |
|
|
||
| import bigframes | ||
| from bigframes._config import display_options, options | ||
| from bigframes.display import plaintext | ||
| from bigframes.display import _flatten, plaintext | ||
| import bigframes.formatting_helpers as formatter | ||
|
|
||
| if typing.TYPE_CHECKING: | ||
|
|
@@ -48,13 +48,17 @@ def render_html( | |
| orderable_columns: list[str] | None = None, | ||
| max_columns: int | None = None, | ||
| ) -> str: | ||
| """Render a pandas DataFrame to HTML with specific styling.""" | ||
| """Render a pandas DataFrame to HTML with specific styling and nested data support.""" | ||
| # Flatten nested data first | ||
| flatten_result = _flatten.flatten_nested_data(dataframe) | ||
| flat_df = flatten_result.dataframe | ||
|
|
||
| orderable_columns = orderable_columns or [] | ||
| classes = "dataframe table table-striped table-hover" | ||
| table_html_parts = [f'<table border="1" class="{classes}" id="{table_id}">'] | ||
|
|
||
| # Handle column truncation | ||
| columns = list(dataframe.columns) | ||
| columns = list(flat_df.columns) | ||
| if max_columns is not None and max_columns > 0 and len(columns) > max_columns: | ||
| half = max_columns // 2 | ||
| left_columns = columns[:half] | ||
|
|
@@ -70,11 +74,20 @@ def render_html( | |
|
|
||
| table_html_parts.append( | ||
| _render_table_header( | ||
| dataframe, orderable_columns, left_columns, right_columns, show_ellipsis | ||
| flat_df, orderable_columns, left_columns, right_columns, show_ellipsis | ||
| ) | ||
| ) | ||
| table_html_parts.append( | ||
| _render_table_body(dataframe, left_columns, right_columns, show_ellipsis) | ||
| _render_table_body( | ||
| flat_df, | ||
| flatten_result.row_labels, | ||
| flatten_result.continuation_rows, | ||
| flatten_result.cleared_on_continuation, | ||
| flatten_result.nested_columns, | ||
| left_columns, | ||
| right_columns, | ||
| show_ellipsis, | ||
| ) | ||
| ) | ||
| table_html_parts.append("</table>") | ||
| return "".join(table_html_parts) | ||
|
|
@@ -117,6 +130,10 @@ def render_col_header(col): | |
|
|
||
| def _render_table_body( | ||
| dataframe: pd.DataFrame, | ||
| row_labels: list[str] | None, | ||
| continuation_rows: set[int] | None, | ||
| clear_on_continuation: list[str], | ||
|
Comment on lines
+134
to
+135
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, add some more explanation to the docstrings. To keep it shorter, you could reference bigframes/display/_flatten.py so that folks can look there for the complete explanation.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. I updated the docstrings to reference bigframes.display._flatten.FlattenResult for the detailed definitions. |
||
| nested_originated_columns: set[str], | ||
| left_columns: list[Any], | ||
| right_columns: list[Any], | ||
| show_ellipsis: bool, | ||
|
|
@@ -126,30 +143,39 @@ def _render_table_body( | |
| precision = options.display.precision | ||
|
|
||
| for i in range(len(dataframe)): | ||
| body_parts.append(" <tr>") | ||
| row_class = "" | ||
| orig_row_idx = None | ||
| is_continuation = False | ||
|
|
||
| if row_labels: | ||
| orig_row_idx = row_labels[i] | ||
|
|
||
| if continuation_rows and i in continuation_rows: | ||
| is_continuation = True | ||
| row_class = "array-continuation" | ||
|
|
||
| if orig_row_idx is not None: | ||
| body_parts.append( | ||
| f' <tr class="{row_class}" data-orig-row="{orig_row_idx}">' | ||
| ) | ||
| else: | ||
| body_parts.append(" <tr>") | ||
|
|
||
| row = dataframe.iloc[i] | ||
|
|
||
| def render_col_cell(col_name): | ||
| value = row[col_name] | ||
| dtype = dataframe.dtypes.loc[col_name] # type: ignore | ||
| align = "right" if _is_dtype_numeric(dtype) else "left" | ||
|
|
||
| # TODO(b/438181139): Consider semi-exploding ARRAY/STRUCT columns | ||
| # into multiple rows/columns like the BQ UI does. | ||
| if pandas.api.types.is_scalar(value) and pd.isna(value): | ||
| body_parts.append( | ||
| f' <td class="cell-align-{align}">' | ||
| '<em class="null-value"><NA></em></td>' | ||
| ) | ||
| else: | ||
| if isinstance(value, float): | ||
| cell_content = f"{value:.{precision}f}" | ||
| else: | ||
| cell_content = str(value) | ||
| body_parts.append( | ||
| f' <td class="cell-align-{align}">' | ||
| f"{html.escape(cell_content)}</td>" | ||
| ) | ||
| cell_html = _render_cell( | ||
| value, | ||
| dtype, | ||
| is_continuation, | ||
| str(col_name), | ||
| clear_on_continuation, | ||
| nested_originated_columns, | ||
| precision, | ||
| ) | ||
| body_parts.append(cell_html) | ||
|
|
||
| for col in left_columns: | ||
| render_col_cell(col) | ||
|
|
@@ -166,6 +192,43 @@ def render_col_cell(col_name): | |
| return "\n".join(body_parts) | ||
|
|
||
|
|
||
| def _render_cell( | ||
| value: Any, | ||
| dtype: Any, | ||
| is_continuation: bool, | ||
| col_name_str: str, | ||
| clear_on_continuation: list[str], | ||
| nested_originated_columns: set[str], | ||
| precision: int, | ||
| ) -> str: | ||
| """Render a single cell of the HTML table.""" | ||
| if is_continuation and col_name_str in clear_on_continuation: | ||
| return " <td></td>" | ||
|
|
||
| if col_name_str in nested_originated_columns: | ||
| align = "left" | ||
| else: | ||
| align = "right" if _is_dtype_numeric(dtype) else "left" | ||
|
|
||
| if pandas.api.types.is_scalar(value) and pd.isna(value): | ||
| if is_continuation: | ||
| # For padding nulls in continuation rows, show empty cell | ||
| return f' <td class="cell-align-{align}"></td>' | ||
| else: | ||
| # For primary nulls, keep showing the <NA> indicator but maybe styled | ||
| return ( | ||
| f' <td class="cell-align-{align}">' | ||
| '<em class="null-value"><NA></em></td>' | ||
| ) | ||
|
|
||
| if isinstance(value, float): | ||
| cell_content = f"{value:.{precision}f}" | ||
| else: | ||
| cell_content = str(value) | ||
|
|
||
| return f' <td class="cell-align-{align}">' f"{html.escape(cell_content)}</td>" | ||
|
|
||
|
|
||
| def _obj_ref_rt_to_html(obj_ref_rt: str) -> str: | ||
| obj_ref_rt_json = json.loads(obj_ref_rt) | ||
| obj_ref_details = obj_ref_rt_json["objectref"]["details"] | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Neat feature! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please create a test_flatten.py file with a few tests that check some of the flattening logic directly without the HTML rendering part. Specifically, let's focus on what happens to index/multiindex columns, as that's my main worry / question.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. I created tests/unit/display/test_flatten.py. I moved the logic-specific tests there and added dedicated test cases (test_flatten_preserves_original_index, test_flatten_preserves_multiindex) to verify that indices are correctly preserved and duplicated during the flattening process.