python-bigquery-dataframes/bigframes/display/html.py at 3d3bc2dcd19ae1135ece7320d7da8a9efb8e1060 · googleapis/python-bigquery-dataframes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""HTML rendering for DataFrames and other objects."""

from __future__ import annotations

import html
import json
import traceback
import typing
from typing import Any, Union
import warnings

import pandas as pd
import pandas.api.types

import bigframes
from bigframes._config import display_options, options
from bigframes.display import plaintext
import bigframes.formatting_helpers as formatter

if typing.TYPE_CHECKING:
    import bigframes.dataframe
    import bigframes.series


def _is_dtype_numeric(dtype: Any) -> bool:
    """Check if a dtype is numeric for alignment purposes."""
    return pandas.api.types.is_numeric_dtype(dtype)


def render_html(
    *,
    dataframe: pd.DataFrame,
    table_id: str,
    orderable_columns: list[str] | None = None,
    max_columns: int | None = None,
) -> str:
    """Render a pandas DataFrame to HTML with specific styling."""
    orderable_columns = orderable_columns or []
    classes = "dataframe table table-striped table-hover"
    table_html_parts = [f'<table border="1" class="{classes}" id="{table_id}">']

    # Handle column truncation
    columns = list(dataframe.columns)
    if max_columns is not None and max_columns > 0 and len(columns) > max_columns:
        half = max_columns // 2
        left_columns = columns[:half]
        # Ensure we don't take more than available if half is 0 or calculation is weird,
        # but typical case is safe.
        right_count = max_columns - half
        right_columns = columns[-right_count:] if right_count > 0 else []
        show_ellipsis = True
    else:
        left_columns = columns
        right_columns = []
        show_ellipsis = False

    table_html_parts.append(
        _render_table_header(
            dataframe, orderable_columns, left_columns, right_columns, show_ellipsis
        )
    )
    table_html_parts.append(
        _render_table_body(dataframe, left_columns, right_columns, show_ellipsis)
    )
    table_html_parts.append("</table>")
    return "".join(table_html_parts)


def _render_table_header(
    dataframe: pd.DataFrame,
    orderable_columns: list[str],
    left_columns: list[Any],
    right_columns: list[Any],
    show_ellipsis: bool,
) -> str:
    """Render the header of the HTML table."""
    header_parts = ["  <thead>", "    <tr>"]

    def render_col_header(col):
        th_classes = []
        if col in orderable_columns:
            th_classes.append("sortable")
        class_str = f'class="{" ".join(th_classes)}"' if th_classes else ""
        header_parts.append(
            f'      <th {class_str}><div class="bf-header-content">'
            f"{html.escape(str(col))}</div></th>"
        )

    for col in left_columns:
        render_col_header(col)

    if show_ellipsis:
        header_parts.append(
            '      <th><div class="bf-header-content" style="cursor: default;">...</div></th>'
        )

    for col in right_columns:
        render_col_header(col)

    header_parts.extend(["    </tr>", "  </thead>"])
    return "\n".join(header_parts)


def _render_table_body(
    dataframe: pd.DataFrame,
    left_columns: list[Any],
    right_columns: list[Any],
    show_ellipsis: bool,
) -> str:
    """Render the body of the HTML table."""
    body_parts = ["  <tbody>"]
    precision = options.display.precision

    for i in range(len(dataframe)):
        body_parts.append("    <tr>")
        row = dataframe.iloc[i]

        def render_col_cell(col_name):
            value = row[col_name]
            dtype = dataframe.dtypes.loc[col_name]  # type: ignore
            align = "right" if _is_dtype_numeric(dtype) else "left"

            # TODO(b/438181139): Consider semi-exploding ARRAY/STRUCT columns
            # into multiple rows/columns like the BQ UI does.
            if pandas.api.types.is_scalar(value) and pd.isna(value):
                body_parts.append(
                    f'      <td class="cell-align-{align}">'
                    '<em class="null-value">&lt;NA&gt;</em></td>'
                )
            else:
                if isinstance(value, float):
                    cell_content = f"{value:.{precision}f}"
                else:
                    cell_content = str(value)
                body_parts.append(
                    f'      <td class="cell-align-{align}">'
                    f"{html.escape(cell_content)}</td>"
                )

        for col in left_columns:
            render_col_cell(col)

        if show_ellipsis:
            # Ellipsis cell
            body_parts.append('      <td class="cell-align-left">...</td>')

        for col in right_columns:
            render_col_cell(col)

        body_parts.append("    </tr>")
    body_parts.append("  </tbody>")
    return "\n".join(body_parts)


def _obj_ref_rt_to_html(obj_ref_rt: str) -> str:
    obj_ref_rt_json = json.loads(obj_ref_rt)
    obj_ref_details = obj_ref_rt_json["objectref"]["details"]
    if "gcs_metadata" in obj_ref_details:
        gcs_metadata = obj_ref_details["gcs_metadata"]
        content_type = typing.cast(str, gcs_metadata.get("content_type", ""))
        if content_type.startswith("image"):
            size_str = ""
            if options.display.blob_display_width:
                size_str = f' width="{options.display.blob_display_width}"'
            if options.display.blob_display_height:
                size_str = size_str + f' height="{options.display.blob_display_height}"'
            url = obj_ref_rt_json["access_urls"]["read_url"]
            return f'<img src="{url}"{size_str}>'

    return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}'


def create_html_representation(
    obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
    pandas_df: pd.DataFrame,
    total_rows: int,
    total_columns: int,
    blob_cols: list[str],
) -> str:
    """Create an HTML representation of the DataFrame or Series."""
    from bigframes.series import Series

    opts = options.display
    with display_options.pandas_repr(opts):
        if isinstance(obj, Series):
            # Some pandas objects may not have a _repr_html_ method, or it might
            # fail in certain environments. We fall back to a pre-formatted
            # string representation to ensure something is always displayed.
            pd_series = pandas_df.iloc[:, 0]
            try:
                # TODO(b/464053870): Support rich display for blob Series.
                html_string = pd_series._repr_html_()
            except AttributeError:
                html_string = f"<pre>{pd_series.to_string()}</pre>"

            is_truncated = total_rows is not None and total_rows > len(pandas_df)
            if is_truncated:
                html_string += f"<p>[{total_rows} rows]</p>"
            return html_string
        else:
            # It's a DataFrame
            # TODO(shuowei, b/464053870): Escaping HTML would be useful, but
            # `escape=False` is needed to show images. We may need to implement
            # a full-fledged repr module to better support types not in pandas.
            if options.display.blob_display and blob_cols:
                formatters = {blob_col: _obj_ref_rt_to_html for blob_col in blob_cols}

                # set max_colwidth so not to truncate the image url
                with pandas.option_context("display.max_colwidth", None):
                    html_string = pandas_df.to_html(
                        escape=False,
                        notebook=True,
                        max_rows=pandas.get_option("display.max_rows"),
                        max_cols=pandas.get_option("display.max_columns"),
                        show_dimensions=pandas.get_option("display.show_dimensions"),
                        formatters=formatters,  # type: ignore
                    )
            else:
                # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
                html_string = pandas_df._repr_html_()  # type:ignore

            html_string += f"[{total_rows} rows x {total_columns} columns in total]"
            return html_string


def _get_obj_metadata(
    obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
) -> tuple[bool, bool]:
    from bigframes.series import Series

    is_series = isinstance(obj, Series)
    if is_series:
        has_index = len(obj._block.index_columns) > 0
    else:
        has_index = obj._has_index
    return is_series, has_index


def get_anywidget_bundle(
    obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
    include=None,
    exclude=None,
) -> tuple[dict[str, Any], dict[str, Any]]:
    """
    Helper method to create and return the anywidget mimebundle.
    This function encapsulates the logic for anywidget display.
    """
    from bigframes import display
    from bigframes.series import Series

    if isinstance(obj, Series):
        df = obj.to_frame()
    else:
        df, blob_cols = obj._get_display_df_and_blob_cols()

    widget = display.TableWidget(df)
    widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude)

    if isinstance(widget_repr_result, tuple):
        widget_repr, widget_metadata = widget_repr_result
    else:
        widget_repr = widget_repr_result
        widget_metadata = {}

    widget_repr = dict(widget_repr)

    # Use cached data from widget to render HTML and plain text versions.
    cached_pd = widget._cached_data
    total_rows = widget.row_count
    total_columns = len(df.columns)

    widget_repr["text/html"] = create_html_representation(
        obj,
        cached_pd,
        total_rows,
        total_columns,
        blob_cols if "blob_cols" in locals() else [],
    )
    is_series, has_index = _get_obj_metadata(obj)
    widget_repr["text/plain"] = plaintext.create_text_representation(
        cached_pd,
        total_rows,
        is_series=is_series,
        has_index=has_index,
        column_count=len(df.columns) if not is_series else 0,
    )

    return widget_repr, widget_metadata


def repr_mimebundle_deferred(
    obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
) -> dict[str, str]:
    return {
        "text/plain": formatter.repr_query_job(obj._compute_dry_run()),
        "text/html": formatter.repr_query_job_html(obj._compute_dry_run()),
    }


def repr_mimebundle_head(
    obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
) -> dict[str, str]:
    from bigframes.series import Series

    opts = options.display
    blob_cols: list[str]
    if isinstance(obj, Series):
        pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results(
            opts.max_rows
        )
        blob_cols = []
    else:
        df, blob_cols = obj._get_display_df_and_blob_cols()
        pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
            opts.max_rows
        )

    obj._set_internal_query_job(query_job)
    column_count = len(pandas_df.columns)

    html_string = create_html_representation(
        obj, pandas_df, row_count, column_count, blob_cols
    )

    is_series, has_index = _get_obj_metadata(obj)
    text_representation = plaintext.create_text_representation(
        pandas_df,
        row_count,
        is_series=is_series,
        has_index=has_index,
        column_count=len(pandas_df.columns) if not is_series else 0,
    )

    return {"text/html": html_string, "text/plain": text_representation}


def repr_mimebundle(
    obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
    include=None,
    exclude=None,
):
    """Custom display method for IPython/Jupyter environments."""
    # TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and
    # BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed.

    opts = options.display
    if opts.repr_mode == "deferred":
        return repr_mimebundle_deferred(obj)

    if opts.repr_mode == "anywidget":
        try:
            with bigframes.option_context("display.progress_bar", None):
                with warnings.catch_warnings():
                    warnings.simplefilter(
                        "ignore", category=bigframes.exceptions.JSONDtypeWarning
                    )
                    warnings.simplefilter("ignore", category=FutureWarning)
                    return get_anywidget_bundle(obj, include=include, exclude=exclude)
        except ImportError:
            # Anywidget is an optional dependency, so warn rather than fail.
            # TODO(shuowei): When Anywidget becomes the default for all repr modes,
            # remove this warning.
            warnings.warn(
                "Anywidget mode is not available. "
                "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. "
                f"Falling back to static HTML. Error: {traceback.format_exc()}"
            )

    return repr_mimebundle_head(obj)