-
Notifications
You must be signed in to change notification settings - Fork 67
fix: Correct DataFrame widget rendering in Colab #2319
Changes from 2 commits
be1dc28
751c96d
9e61250
fb9c94a
a4b5d69
b516978
63c15dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -789,9 +789,7 @@ def __repr__(self) -> str: | |
|
|
||
| opts = bigframes.options.display | ||
| max_results = opts.max_rows | ||
| # anywdiget mode uses the same display logic as the "deferred" mode | ||
| # for faster execution | ||
| if opts.repr_mode in ("deferred", "anywidget"): | ||
| if opts.repr_mode == "deferred": | ||
| return formatter.repr_query_job(self._compute_dry_run()) | ||
|
|
||
| # TODO(swast): pass max_columns and get the true column count back. Maybe | ||
|
|
@@ -829,68 +827,143 @@ def __repr__(self) -> str: | |
| lines.append(f"[{row_count} rows x {column_count} columns]") | ||
| return "\n".join(lines) | ||
|
|
||
| def _repr_html_(self) -> str: | ||
| """ | ||
| Returns an html string primarily for use by notebooks for displaying | ||
| a representation of the DataFrame. Displays 20 rows by default since | ||
| many notebooks are not configured for large tables. | ||
| """ | ||
| opts = bigframes.options.display | ||
| max_results = opts.max_rows | ||
| if opts.repr_mode == "deferred": | ||
| return formatter.repr_query_job(self._compute_dry_run()) | ||
|
|
||
| # Process blob columns first, regardless of display mode | ||
| self._cached() | ||
| df = self.copy() | ||
| def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: | ||
| """Process blob columns for display.""" | ||
| df = self | ||
| blob_cols = [] | ||
| if bigframes.options.display.blob_display: | ||
| blob_cols = [ | ||
| series_name | ||
| for series_name, series in df.items() | ||
| for series_name, series in self.items() | ||
| if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE | ||
| ] | ||
| for col in blob_cols: | ||
| # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. | ||
| df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) | ||
| if blob_cols: | ||
| df = self.copy() | ||
| for col in blob_cols: | ||
| # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. | ||
| df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) | ||
| return df, blob_cols | ||
|
|
||
| def _get_anywidget_bundle(self, include=None, exclude=None): | ||
| """ | ||
| Helper method to create and return the anywidget mimebundle. | ||
| This function encapsulates the logic for anywidget display. | ||
| """ | ||
| from bigframes import display | ||
|
|
||
| # TODO(shuowei): Keep blob_cols and pass them to TableWidget so that they can render properly. | ||
| df, _ = self._get_display_df_and_blob_cols() | ||
|
|
||
| # Create and display the widget | ||
| widget = display.TableWidget(df) | ||
| widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) | ||
|
|
||
| # Handle both tuple (data, metadata) and dict returns | ||
| if isinstance(widget_repr_result, tuple): | ||
| widget_repr, widget_metadata = widget_repr_result | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even though this PR is a large one, the only difference from #2271 is this added metadata part. PR 2271 discards metadata, which causes the widget is not displayed at colab notebook. I modify the method to ensure metadata is preserved and returned. Verified at: screen/AjTEQC8SrSfMqhN |
||
| else: | ||
| blob_cols = [] | ||
| widget_repr = widget_repr_result | ||
| widget_metadata = None | ||
|
||
|
|
||
| if opts.repr_mode == "anywidget": | ||
| try: | ||
| from IPython.display import display as ipython_display | ||
| widget_repr = dict(widget_repr) | ||
|
|
||
| from bigframes import display | ||
| # At this point, we have already executed the query as part of the | ||
| # widget construction. Let's use the information available to render | ||
| # the HTML and plain text versions. | ||
| widget_repr["text/html"] = widget.table_html | ||
|
||
|
|
||
| # Always create a new widget instance for each display call | ||
| # This ensures that each cell gets its own widget and prevents | ||
| # unintended sharing between cells | ||
| widget = display.TableWidget(df.copy()) | ||
| widget_repr["text/plain"] = self._create_text_representation( | ||
| widget._cached_data, widget.row_count | ||
| ) | ||
|
|
||
| if widget_metadata is not None: | ||
| return widget_repr, widget_metadata | ||
| return widget_repr | ||
|
||
|
|
||
| def _create_text_representation( | ||
| self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int] | ||
| ) -> str: | ||
| """Create a text representation of the DataFrame.""" | ||
| opts = bigframes.options.display | ||
| with display_options.pandas_repr(opts): | ||
| import pandas.io.formats | ||
|
|
||
| ipython_display(widget) | ||
| return "" # Return empty string since we used display() | ||
| # safe to mutate this, this dict is owned by this code, and does not affect global config | ||
| to_string_kwargs = ( | ||
| pandas.io.formats.format.get_dataframe_repr_params() # type: ignore | ||
| ) | ||
| if not self._has_index: | ||
| to_string_kwargs.update({"index": False}) | ||
|
|
||
| # We add our own dimensions string, so don't want pandas to. | ||
| to_string_kwargs.update({"show_dimensions": False}) | ||
| repr_string = pandas_df.to_string(**to_string_kwargs) | ||
|
|
||
| except (AttributeError, ValueError, ImportError): | ||
| # Fallback if anywidget is not available | ||
| lines = repr_string.split("\n") | ||
|
|
||
| if total_rows is not None and total_rows > len(pandas_df): | ||
| lines.append("...") | ||
|
|
||
| lines.append("") | ||
| column_count = len(self.columns) | ||
| lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") | ||
| return "\n".join(lines) | ||
|
|
||
| def _repr_mimebundle_(self, include=None, exclude=None): | ||
| """ | ||
| Custom display method for IPython/Jupyter environments. | ||
| This is called by IPython's display system when the object is displayed. | ||
| """ | ||
| opts = bigframes.options.display | ||
| # Only handle widget display in anywidget mode | ||
| if opts.repr_mode == "anywidget": | ||
| try: | ||
| return self._get_anywidget_bundle(include=include, exclude=exclude) | ||
|
|
||
| except ImportError: | ||
| # Anywidget is an optional dependency, so warn rather than fail. | ||
| # TODO(shuowei): When Anywidget becomes the default for all repr modes, | ||
| # remove this warning. | ||
| warnings.warn( | ||
| "Anywidget mode is not available. " | ||
| "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " | ||
| f"Falling back to deferred mode. Error: {traceback.format_exc()}" | ||
| f"Falling back to static HTML. Error: {traceback.format_exc()}" | ||
| ) | ||
| return formatter.repr_query_job(self._compute_dry_run()) | ||
|
|
||
| # Continue with regular HTML rendering for non-anywidget modes | ||
| # TODO(swast): pass max_columns and get the true column count back. Maybe | ||
| # get 1 more column than we have requested so that pandas can add the | ||
| # ... for us? | ||
| # In non-anywidget mode, fetch data once and use it for both HTML | ||
| # and plain text representations to avoid multiple queries. | ||
| opts = bigframes.options.display | ||
| max_results = opts.max_rows | ||
|
|
||
| df, blob_cols = self._get_display_df_and_blob_cols() | ||
|
|
||
| pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( | ||
| max_results | ||
| ) | ||
|
|
||
| self._set_internal_query_job(query_job) | ||
| column_count = len(pandas_df.columns) | ||
|
|
||
| html_string = self._create_html_representation( | ||
| pandas_df, row_count, column_count, blob_cols | ||
| ) | ||
|
|
||
| text_representation = self._create_text_representation(pandas_df, row_count) | ||
|
|
||
| return {"text/html": html_string, "text/plain": text_representation} | ||
|
|
||
| def _create_html_representation( | ||
| self, | ||
| pandas_df: pandas.DataFrame, | ||
| row_count: int, | ||
| column_count: int, | ||
| blob_cols: list[str], | ||
| ) -> str: | ||
| """Create an HTML representation of the DataFrame.""" | ||
| opts = bigframes.options.display | ||
| with display_options.pandas_repr(opts): | ||
| # Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas. | ||
| # TODO(shuowei, b/464053870): Escaping HTML would be useful, but | ||
| # `escape=False` is needed to show images. We may need to implement | ||
| # a full-fledged repr module to better support types not in pandas. | ||
| if bigframes.options.display.blob_display and blob_cols: | ||
|
|
||
| def obj_ref_rt_to_html(obj_ref_rt) -> str: | ||
|
|
@@ -919,15 +992,12 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: | |
|
|
||
| # set max_colwidth so not to truncate the image url | ||
| with pandas.option_context("display.max_colwidth", None): | ||
| max_rows = pandas.get_option("display.max_rows") | ||
| max_cols = pandas.get_option("display.max_columns") | ||
| show_dimensions = pandas.get_option("display.show_dimensions") | ||
| html_string = pandas_df.to_html( | ||
| escape=False, | ||
| notebook=True, | ||
| max_rows=max_rows, | ||
| max_cols=max_cols, | ||
| show_dimensions=show_dimensions, | ||
| max_rows=pandas.get_option("display.max_rows"), | ||
| max_cols=pandas.get_option("display.max_columns"), | ||
| show_dimensions=pandas.get_option("display.show_dimensions"), | ||
| formatters=formatters, # type: ignore | ||
| ) | ||
| else: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's add a return type here, which is a tuple with two dictionaries.