Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit 86b9140

Browse files
chore: Make graph visualization more scalable (#208)
We now only include query results directly in the html when the query results are less than 100 KB. For larger query results, we store only the reference to the destination table in the HTML, and have the python code re-read the query results from the destination table during the callback. Also, added a hard limit of 5 MB in the query result size, beyond which, graph visualization is not supported altogether. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-magics/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent c46c94a commit 86b9140

File tree

5 files changed

+414
-58
lines changed

5 files changed

+414
-58
lines changed

bigquery_magics/bigquery.py

Lines changed: 39 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@
117117
import IPython # type: ignore
118118
from IPython.core import magic_arguments # type: ignore
119119
from IPython.core.getipython import get_ipython
120-
from google.api_core import client_info
121120
from google.api_core.exceptions import NotFound
122121
from google.cloud import bigquery
123122
from google.cloud.bigquery import exceptions
@@ -126,13 +125,12 @@
126125
from google.cloud.bigquery.job import QueryJobConfig
127126
import pandas
128127

129-
from bigquery_magics import environment
130128
from bigquery_magics import line_arg_parser as lap
131129
import bigquery_magics._versions_helpers
132130
import bigquery_magics.config
133131
import bigquery_magics.graph_server as graph_server
132+
from bigquery_magics import core
134133
import bigquery_magics.pyformat
135-
import bigquery_magics.version
136134

137135
try:
138136
from google.cloud import bigquery_storage # type: ignore
@@ -147,24 +145,6 @@
147145
context = bigquery_magics.config.context
148146

149147

150-
def _get_user_agent():
151-
identities = [
152-
f"ipython-{IPython.__version__}",
153-
f"bigquery-magics/{bigquery_magics.version.__version__}",
154-
]
155-
156-
if environment.is_vscode():
157-
identities.append("vscode")
158-
if environment.is_vscode_google_cloud_code_extension_installed():
159-
identities.append(environment.GOOGLE_CLOUD_CODE_EXTENSION_NAME)
160-
elif environment.is_jupyter():
161-
identities.append("jupyter")
162-
if environment.is_jupyter_bigquery_plugin_installed():
163-
identities.append(environment.BIGQUERY_JUPYTER_PLUGIN_NAME)
164-
165-
return " ".join(identities)
166-
167-
168148
def _handle_error(error, destination_var=None):
169149
"""Process a query execution error.
170150
@@ -565,23 +545,11 @@ def _query_with_pandas(query: str, params: List[Any], args: Any):
565545

566546

567547
def _create_clients(args: Any) -> Tuple[bigquery.Client, Any]:
568-
bigquery_client_options = copy.deepcopy(context.bigquery_client_options)
569-
if args.bigquery_api_endpoint:
570-
if isinstance(bigquery_client_options, dict):
571-
bigquery_client_options["api_endpoint"] = args.bigquery_api_endpoint
572-
else:
573-
bigquery_client_options.api_endpoint = args.bigquery_api_endpoint
574-
575-
bq_client = bigquery.Client(
576-
project=args.project or context.project,
577-
credentials=context.credentials,
578-
default_query_job_config=context.default_query_job_config,
579-
client_info=client_info.ClientInfo(user_agent=_get_user_agent()),
580-
client_options=bigquery_client_options,
548+
bq_client = core.create_bq_client(
549+
project=args.project,
550+
bigquery_api_endpoint=args.bigquery_api_endpoint,
581551
location=args.location,
582552
)
583-
if context._connection:
584-
bq_client._connection = context._connection
585553

586554
# Check and instantiate bq storage client
587555
if args.use_bqstorage_api is not None:
@@ -634,7 +602,7 @@ def _handle_result(result, args):
634602

635603
def _colab_query_callback(query: str, params: str):
636604
return IPython.core.display.JSON(
637-
graph_server.convert_graph_data(query_results=json.loads(params))
605+
graph_server.convert_graph_params(json.loads(params))
638606
)
639607

640608

@@ -663,7 +631,11 @@ def _colab_node_expansion_callback(request: dict, params_str: str):
663631
singleton_server_thread: threading.Thread = None
664632

665633

666-
def _add_graph_widget(query_result):
634+
MAX_GRAPH_VISUALIZATION_SIZE = 2_000_000
635+
MAX_GRAPH_VISUALIZATION_QUERY_RESULT_SIZE = 100_000
636+
637+
638+
def _add_graph_widget(query_result: pandas.DataFrame, query_job: Any, args: Any):
667639
try:
668640
from spanner_graphs.graph_visualization import generate_visualization_html
669641
except ImportError as err:
@@ -700,10 +672,36 @@ def _add_graph_widget(query_result):
700672
port = graph_server.graph_server.port
701673

702674
# Create html to invoke the graph server
675+
args_dict = {
676+
"bigquery_api_endpoint": args.bigquery_api_endpoint,
677+
"project": args.project,
678+
"location": args.location,
679+
}
680+
681+
estimated_size = query_result.memory_usage(index=True, deep=True).sum()
682+
if estimated_size > MAX_GRAPH_VISUALIZATION_SIZE:
683+
IPython.display.display(
684+
IPython.core.display.HTML(
685+
"<big><b>Error:</b> The query result is too large for graph visualization.</big>"
686+
)
687+
)
688+
return
689+
690+
table_dict = {
691+
"projectId": query_job.configuration.destination.project,
692+
"datasetId": query_job.configuration.destination.dataset_id,
693+
"tableId": query_job.configuration.destination.table_id,
694+
}
695+
696+
params_dict = {"destination_table": table_dict, "args": args_dict}
697+
if estimated_size < MAX_GRAPH_VISUALIZATION_QUERY_RESULT_SIZE:
698+
params_dict["query_result"] = json.loads(query_result.to_json())
699+
700+
params_str = json.dumps(params_dict)
703701
html_content = generate_visualization_html(
704702
query="placeholder query",
705703
port=port,
706-
params=query_result.to_json().replace("\\", "\\\\").replace('"', '\\"'),
704+
params=params_str.replace("\\", "\\\\").replace('"', '\\"'),
707705
)
708706
html_content = html_content.replace(
709707
'"graph_visualization.Query"', '"bigquery.graph_visualization.Query"'
@@ -819,7 +817,7 @@ def _make_bq_query(
819817
result = result.to_dataframe(**dataframe_kwargs)
820818

821819
if args.graph and _supports_graph_widget(result):
822-
_add_graph_widget(result)
820+
_add_graph_widget(result, query_job, args)
823821
return _handle_result(result, args)
824822

825823

@@ -913,7 +911,7 @@ def _make_bqstorage_client(client, client_options):
913911

914912
return client._ensure_bqstorage_client(
915913
client_options=client_options,
916-
client_info=gapic_client_info.ClientInfo(user_agent=_get_user_agent()),
914+
client_info=gapic_client_info.ClientInfo(user_agent=core._get_user_agent()),
917915
)
918916

919917

bigquery_magics/core.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# Copyright 2026 Google LLC
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import copy
16+
17+
from google.api_core import client_info
18+
from google.cloud import bigquery
19+
import IPython # type: ignore
20+
21+
from bigquery_magics import environment
22+
import bigquery_magics.config
23+
import bigquery_magics.version
24+
25+
context = bigquery_magics.config.context
26+
27+
28+
def _get_user_agent():
29+
identities = [
30+
f"ipython-{IPython.__version__}",
31+
f"bigquery-magics/{bigquery_magics.version.__version__}",
32+
]
33+
34+
if environment.is_vscode():
35+
identities.append("vscode")
36+
if environment.is_vscode_google_cloud_code_extension_installed():
37+
identities.append(environment.GOOGLE_CLOUD_CODE_EXTENSION_NAME)
38+
elif environment.is_jupyter():
39+
identities.append("jupyter")
40+
if environment.is_jupyter_bigquery_plugin_installed():
41+
identities.append(environment.BIGQUERY_JUPYTER_PLUGIN_NAME)
42+
43+
return " ".join(identities)
44+
45+
46+
def create_bq_client(*, project: str, bigquery_api_endpoint: str, location: str):
47+
"""Creates a BigQuery client.
48+
49+
Args:
50+
project: Project to use for api calls, None to obtain the project from the context.
51+
bigquery_api_endpoint: Bigquery client endpoint.
52+
location: Cloud region to use for api calls.
53+
54+
Returns:
55+
google.cloud.bigquery.client.Client: The BigQuery client.
56+
"""
57+
bigquery_client_options = copy.deepcopy(context.bigquery_client_options)
58+
if bigquery_api_endpoint:
59+
if isinstance(bigquery_client_options, dict):
60+
bigquery_client_options["api_endpoint"] = bigquery_api_endpoint
61+
else:
62+
bigquery_client_options.api_endpoint = bigquery_api_endpoint
63+
64+
bq_client = bigquery.Client(
65+
project=project or context.project,
66+
credentials=context.credentials,
67+
default_query_job_config=context.default_query_job_config,
68+
client_info=client_info.ClientInfo(user_agent=_get_user_agent()),
69+
client_options=bigquery_client_options,
70+
location=location,
71+
)
72+
if context._connection:
73+
bq_client._connection = context._connection
74+
75+
return bq_client

bigquery_magics/graph_server.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
import threading
2020
from typing import Any, Dict, List
2121

22+
from google.cloud import bigquery
23+
24+
from bigquery_magics import core
25+
2226

2327
def execute_node_expansion(params, request):
2428
return {"error": "Node expansion not yet implemented"}
@@ -54,7 +58,7 @@ def _stringify_properties(d: Any) -> Any:
5458
return _stringify_value(d)
5559

5660

57-
def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
61+
def _convert_graph_data(query_results: Dict[str, Dict[str, str]]):
5862
"""
5963
Converts graph data to the form expected by the visualization framework.
6064
@@ -143,6 +147,24 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
143147
return {"error": getattr(e, "message", str(e))}
144148

145149

150+
def convert_graph_params(params: Dict[str, Any]):
151+
query_results = None
152+
if "query_result" in params:
153+
query_results = params["query_result"]
154+
else:
155+
bq_client = core.create_bq_client(
156+
project=params["args"]["project"],
157+
bigquery_api_endpoint=params["args"]["bigquery_api_endpoint"],
158+
location=params["args"]["location"],
159+
)
160+
161+
table_ref = bigquery.TableReference.from_api_repr(params["destination_table"])
162+
query_results = json.loads(
163+
bq_client.list_rows(table_ref).to_dataframe().to_json()
164+
)
165+
return _convert_graph_data(query_results=query_results)
166+
167+
146168
class GraphServer:
147169
"""
148170
Http server invoked by Javascript to obtain the query results for visualization.
@@ -251,7 +273,9 @@ def handle_post_ping(self):
251273

252274
def handle_post_query(self):
253275
data = self.parse_post_data()
254-
response = convert_graph_data(query_results=json.loads(data["params"]))
276+
params = json.loads(data["params"])
277+
278+
response = convert_graph_params(params)
255279
self.do_data_response(response)
256280

257281
def handle_post_node_expansion(self):

0 commit comments

Comments
 (0)