This repository was archived by the owner on Apr 1, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 67
Expand file tree
/
Copy pathanywidget.py
More file actions
257 lines (207 loc) · 9.03 KB
/
anywidget.py
File metadata and controls
257 lines (207 loc) · 9.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from importlib import resources
import functools
import math
from typing import Any, Dict, Iterator, List, Optional, Type
import uuid
import pandas as pd
import bigframes
from bigframes.core import blocks
import bigframes.dataframe
import bigframes.display.html
# anywidget and traitlets are optional dependencies. We don't want the import of
# this module to fail if they aren't installed, though. Instead, we try to
# limit the surface that these packages could affect. This makes unit testing
# easier and ensures we don't accidentally make these required packages.
try:
import anywidget
import traitlets
ANYWIDGET_INSTALLED = True
except Exception:
ANYWIDGET_INSTALLED = False
WIDGET_BASE: Type[Any]
if ANYWIDGET_INSTALLED:
WIDGET_BASE = anywidget.AnyWidget
else:
WIDGET_BASE = object
class TableWidget(WIDGET_BASE):
"""An interactive, paginated table widget for BigFrames DataFrames.
This widget provides a user-friendly way to display and navigate through
large BigQuery DataFrames within a Jupyter environment.
"""
page = traitlets.Int(0).tag(sync=True)
page_size = traitlets.Int(0).tag(sync=True)
row_count = traitlets.Int(0).tag(sync=True)
table_html = traitlets.Unicode().tag(sync=True)
_initial_load_complete = traitlets.Bool(False).tag(sync=True)
_batches: Optional[blocks.PandasBatches] = None
_error_message = traitlets.Unicode(allow_none=True, default_value=None).tag(
sync=True
)
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
"""Initialize the TableWidget.
Args:
dataframe: The Bigframes Dataframe to display in the widget.
"""
if not ANYWIDGET_INSTALLED:
raise ImportError(
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
)
self._dataframe = dataframe
super().__init__()
# Initialize attributes that might be needed by observers first
self._table_id = str(uuid.uuid4())
self._all_data_loaded = False
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
self._cached_batches: List[pd.DataFrame] = []
# respect display options for initial page size
initial_page_size = bigframes.options.display.max_rows
# set traitlets properties that trigger observers
self.page_size = initial_page_size
# len(dataframe) is expensive, since it will trigger a
# SELECT COUNT(*) query. It is a must have however.
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
# before we get here so that the count might already be cached.
# TODO(b/452747934): Allow row_count to be None and check to see if
# there are multiple pages and show "page 1 of many" in this case
self._reset_batches_for_new_page_size()
if self._batches is None or self._batches.total_rows is None:
self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
self.row_count = 0
else:
self.row_count = self._batches.total_rows
# get the initial page
self._set_table_html()
# Signals to the frontend that the initial data load is complete.
# Also used as a guard to prevent observers from firing during initialization.
self._initial_load_complete = True
@functools.cached_property
def _esm(self):
"""Load JavaScript code from external file."""
return resources.read_text(bigframes.display, "table_widget.js")
@functools.cached_property
def _css(self):
"""Load CSS code from external file."""
return resources.read_text(bigframes.display, "table_widget.css")
@traitlets.validate("page")
def _validate_page(self, proposal: Dict[str, Any]) -> int:
"""Validate and clamp the page number to a valid range.
Args:
proposal: A dictionary from the traitlets library containing the
proposed change. The new value is in proposal["value"].
Returns:
The validated and clamped page number as an integer.
"""
value = proposal["value"]
if self.row_count == 0 or self.page_size == 0:
return 0
# Calculate the zero-indexed maximum page number.
max_page = max(0, math.ceil(self.row_count / self.page_size) - 1)
# Clamp the proposed value to the valid range [0, max_page].
return max(0, min(value, max_page))
@traitlets.validate("page_size")
def _validate_page_size(self, proposal: Dict[str, Any]) -> int:
"""Validate page size to ensure it's positive and reasonable.
Args:
proposal: A dictionary from the traitlets library containing the
proposed change. The new value is in proposal["value"].
Returns:
The validated page size as an integer.
"""
value = proposal["value"]
# Ensure page size is positive and within reasonable bounds
if value <= 0:
return self.page_size # Keep current value
# Cap at reasonable maximum to prevent performance issues
max_page_size = 1000
return min(value, max_page_size)
def _get_next_batch(self) -> bool:
"""
Gets the next batch of data from the generator and appends to cache.
Returns:
True if a batch was successfully loaded, False otherwise.
"""
if self._all_data_loaded:
return False
try:
iterator = self._batch_iterator
batch = next(iterator)
self._cached_batches.append(batch)
return True
except StopIteration:
self._all_data_loaded = True
return False
@property
def _batch_iterator(self) -> Iterator[pd.DataFrame]:
"""Lazily initializes and returns the batch iterator."""
if self._batch_iter is None:
if self._batches is None:
self._batch_iter = iter([])
else:
self._batch_iter = iter(self._batches)
return self._batch_iter
@property
def _cached_data(self) -> pd.DataFrame:
"""Combine all cached batches into a single DataFrame."""
if not self._cached_batches:
return pd.DataFrame(columns=self._dataframe.columns)
return pd.concat(self._cached_batches, ignore_index=True)
def _reset_batches_for_new_page_size(self) -> None:
"""Reset the batch iterator when page size changes."""
self._batches = self._dataframe._to_pandas_batches(page_size=self.page_size)
self._cached_batches = []
self._batch_iter = None
self._all_data_loaded = False
def _set_table_html(self) -> None:
"""Sets the current html data based on the current page and page size."""
if self._error_message:
self.table_html = (
f"<div class='bigframes-error-message'>{self._error_message}</div>"
)
return
start = self.page * self.page_size
end = start + self.page_size
# fetch more data if the requested page is outside our cache
cached_data = self._cached_data
while len(cached_data) < end and not self._all_data_loaded:
if self._get_next_batch():
cached_data = self._cached_data
else:
break
# Get the data for the current page
page_data = cached_data.iloc[start:end]
# Generate HTML table
self.table_html = bigframes.display.html.render_html(
dataframe=page_data,
table_id=f"table-{self._table_id}",
)
@traitlets.observe("page")
def _page_changed(self, _change: Dict[str, Any]) -> None:
"""Handler for when the page number is changed from the frontend."""
if not self._initial_load_complete:
return
self._set_table_html()
@traitlets.observe("page_size")
def _page_size_changed(self, _change: Dict[str, Any]) -> None:
"""Handler for when the page size is changed from the frontend."""
if not self._initial_load_complete:
return
# Reset the page to 0 when page size changes to avoid invalid page states
self.page = 0
# Reset batches to use new page size for future data fetching
self._reset_batches_for_new_page_size()
# Update the table display
self._set_table_html()