1414
1515"""IPython Magics
1616
17- .. function:: ``%%bigquery`` or ``%%bqsql``
17+ .. function:: ``%%bigquery``
1818
1919 IPython cell magic to run a query and display the result as a DataFrame
2020
2121 .. code-block:: python
2222
23- %%bqsql [<destination_var>] [--project <project>] [--use_legacy_sql]
23+ %%bigquery [<destination_var>] [--project <project>] [--use_legacy_sql]
2424 [--verbose] [--params <params>]
2525 <query>
2626
@@ -290,7 +290,6 @@ def _create_dataset_if_necessary(client, dataset_id):
290290 action = "store_true" ,
291291 default = False ,
292292 help = (
293- "Sets query to use Legacy SQL instead of Standard SQL. Defaults to "
294293 "Standard SQL if this argument is not used."
295294 ),
296295)
@@ -546,7 +545,9 @@ def _query_with_pandas(query: str, params: List[Any], args: Any):
546545
547546def _create_clients (args : Any ) -> Tuple [bigquery .Client , Any ]:
548547 bq_client = core .create_bq_client (
549- args .project , args .bigquery_api_endpoint , args .location
548+ project = args .project ,
549+ bigquery_api_endpoint = args .bigquery_api_endpoint ,
550+ location = args .location ,
550551 )
551552
552553 # Check and instantiate bq storage client
@@ -629,8 +630,8 @@ def _colab_node_expansion_callback(request: dict, params_str: str):
629630singleton_server_thread : threading .Thread = None
630631
631632
632- MAX_GRAPH_VISUALIZATION_SIZE = 5000000
633- MAX_GRAPH_VISUALIZATION_QUERY_RESULT_SIZE = 100000
633+ MAX_GRAPH_VISUALIZATION_SIZE = 5_000_000
634+ MAX_GRAPH_VISUALIZATION_QUERY_RESULT_SIZE = 100_000
634635
635636
636637def _estimate_json_size (df : pandas .DataFrame ) -> int :
@@ -671,112 +672,6 @@ def _estimate_json_size(df: pandas.DataFrame) -> int:
671672 return int (key_overhead + structural_overhead + total_val_len )
672673
673674
674- def _convert_schema (schema_json : str ) -> str :
675- """
676- Converts a JSON string from the BigQuery schema format to the format
677- expected by the visualization framework.
678-
679- Args:
680- schema_json: The input JSON string in the BigQuery schema format.
681-
682- Returns:
683- The converted JSON string in the visualization framework format.
684- """
685- data = json .loads (schema_json )
686-
687- graph_id = data .get ("propertyGraphReference" , {}).get (
688- "propertyGraphId" , "SampleGraph"
689- )
690-
691- output = {
692- "catalog" : "" ,
693- "name" : graph_id ,
694- "schema" : "" ,
695- "labels" : [],
696- "nodeTables" : [],
697- "edgeTables" : [],
698- "propertyDeclarations" : [],
699- }
700-
701- labels_dict = {} # name -> set of property names
702- props_dict = {} # name -> type
703-
704- def process_table (table , kind ):
705- name = table .get ("name" )
706- base_table_name = table .get ("dataSourceTable" , {}).get ("tableId" )
707- key_columns = table .get ("keyColumns" , [])
708-
709- label_names = []
710- property_definitions = []
711-
712- for lp in table .get ("labelAndProperties" , []):
713- label = lp .get ("label" )
714- label_names .append (label )
715-
716- if label not in labels_dict :
717- labels_dict [label ] = set ()
718-
719- for prop in lp .get ("properties" , []):
720- prop_name = prop .get ("name" )
721- prop_type = prop .get ("dataType" , {}).get ("typeKind" )
722- prop_expr = prop .get ("expression" )
723-
724- labels_dict [label ].add (prop_name )
725- props_dict [prop_name ] = prop_type
726-
727- property_definitions .append (
728- {
729- "propertyDeclarationName" : prop_name ,
730- "valueExpressionSql" : prop_expr ,
731- }
732- )
733-
734- entry = {
735- "name" : name ,
736- "baseTableName" : base_table_name ,
737- "kind" : kind ,
738- "labelNames" : label_names ,
739- "keyColumns" : key_columns ,
740- "propertyDefinitions" : property_definitions ,
741- }
742-
743- if kind == "EDGE" :
744- src = table .get ("sourceNodeReference" , {})
745- dst = table .get ("destinationNodeReference" , {})
746-
747- entry ["sourceNodeTable" ] = {
748- "nodeTableName" : src .get ("nodeTable" ),
749- "edgeTableColumns" : src .get ("edgeTableColumns" ),
750- "nodeTableColumns" : src .get ("nodeTableColumns" ),
751- }
752- entry ["destinationNodeTable" ] = {
753- "nodeTableName" : dst .get ("nodeTable" ),
754- "edgeTableColumns" : dst .get ("edgeTableColumns" ),
755- "nodeTableColumns" : dst .get ("nodeTableColumns" ),
756- }
757-
758- return entry
759-
760- for nt in data .get ("nodeTables" , []):
761- output ["nodeTables" ].append (process_table (nt , "NODE" ))
762-
763- for et in data .get ("edgeTables" , []):
764- output ["edgeTables" ].append (process_table (et , "EDGE" ))
765-
766- for label_name , prop_names in labels_dict .items ():
767- output ["labels" ].append (
768- {
769- "name" : label_name ,
770- "propertyDeclarationNames" : sorted (list (prop_names )),
771- }
772- )
773-
774- for prop_name , prop_type in props_dict .items ():
775- output ["propertyDeclarations" ].append ({"name" : prop_name , "type" : prop_type })
776-
777- return json .dumps (output , indent = 2 )
778-
779-
780675def _get_graph_name (query_text : str ):
781676 """Returns the name of the graph queried.
782677
@@ -794,25 +689,33 @@ def _get_graph_name(query_text: str):
794689 return None
795690
796691
797- def _get_graph_schema (bq_client : bigquery .client .Client , query_text : str , query_job : bigquery .job .QueryJob ):
692+ def _get_graph_schema (
693+ bq_client : bigquery .client .Client , query_text : str , query_job : bigquery .job .QueryJob
694+ ):
798695 graph_name_result = _get_graph_name (query_text )
799696 if graph_name_result is None :
800697 return None
801698 dataset_id , graph_id = graph_name_result
802699
803- info_schema_query = f'''
700+ info_schema_query = f"""
804701 select PROPERTY_GRAPH_METADATA_JSON
805702 FROM `{ query_job .configuration .destination .project } .{ dataset_id } `.INFORMATION_SCHEMA.PROPERTY_GRAPHS
806703 WHERE PROPERTY_GRAPH_NAME = "{ graph_id } "
807- '''
704+ """
808705 info_schema_results = bq_client .query (info_schema_query ).to_dataframe ()
809706
810707 if info_schema_results .shape == (1 , 1 ):
811- return _convert_schema (info_schema_results .iloc [0 , 0 ])
708+ return graph_server . _convert_schema (info_schema_results .iloc [0 , 0 ])
812709 return None
813710
814711
815- def _add_graph_widget (bq_client : Any , query_result : pandas .DataFrame , query_text : str , query_job : Any , args : Any ):
712+ def _add_graph_widget (
713+ bq_client : Any ,
714+ query_result : pandas .DataFrame ,
715+ query_text : str ,
716+ query_job : Any ,
717+ args : Any ,
718+ ):
816719 try :
817720 from spanner_graphs .graph_visualization import generate_visualization_html
818721 except ImportError as err :
@@ -855,7 +758,7 @@ def _add_graph_widget(bq_client: Any, query_result: pandas.DataFrame, query_text
855758 "location" : args .location ,
856759 }
857760
858- estimated_size = _estimate_json_size ( query_result )
761+ estimated_size = query_result . memory_usage ( index = True , deep = True ). sum ( )
859762 if estimated_size > MAX_GRAPH_VISUALIZATION_SIZE :
860763 IPython .display .display (
861764 IPython .core .display .HTML (
@@ -865,7 +768,7 @@ def _add_graph_widget(bq_client: Any, query_result: pandas.DataFrame, query_text
865768 return
866769
867770 schema = _get_graph_schema (bq_client , query_text , query_job )
868-
771+
869772 table_dict = {
870773 "projectId" : query_job .configuration .destination .project ,
871774 "datasetId" : query_job .configuration .destination .dataset_id ,
0 commit comments