Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 10 additions & 25 deletions packages/bigquery-magics/bigquery_magics/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,41 +635,26 @@ def _colab_node_expansion_callback(request: dict, params_str: str):
MAX_GRAPH_VISUALIZATION_QUERY_RESULT_SIZE = 100_000


def _get_graph_name(query_text: str):
"""Returns the name of the graph queried.

Supports GRAPH only, not GRAPH_TABLE.

Args:
query_text: The SQL query text.

Returns:
A (dataset_id, graph_id) tuple, or None if the graph name cannot be determined.
"""
match = re.match(r"\s*GRAPH\s+(\S+)\.(\S+)", query_text, re.IGNORECASE)
if match:
(dataset_id, graph_id) = (match.group(1)), match.group(2)
if "`" in dataset_id or "`" in graph_id:
return None # Backticks in graph name not support for schema view
return (dataset_id, graph_id)
return None


def _get_graph_schema(
bq_client: bigquery.client.Client, query_text: str, query_job: bigquery.job.QueryJob
):
graph_name_result = _get_graph_name(query_text)
if graph_name_result is None:
property_graphs = query_job.referenced_property_graphs
if len(property_graphs) != 1:
return None
dataset_id, graph_id = graph_name_result

graph_ref = property_graphs[0]

info_schema_query = f"""
select PROPERTY_GRAPH_METADATA_JSON
FROM `{query_job.configuration.destination.project}.{dataset_id}`.INFORMATION_SCHEMA.PROPERTY_GRAPHS
FROM `{graph_ref.project}.{graph_ref.dataset_id}`.INFORMATION_SCHEMA.PROPERTY_GRAPHS
WHERE PROPERTY_GRAPH_NAME = @graph_id
"""
job_config = bigquery.QueryJobConfig(
query_parameters=[bigquery.ScalarQueryParameter("graph_id", "STRING", graph_id)]
query_parameters=[
bigquery.ScalarQueryParameter(
"graph_id", "STRING", graph_ref.property_graph_id
)
]
)
job_config.use_legacy_sql = False
try:
Expand Down
2 changes: 1 addition & 1 deletion packages/bigquery-magics/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
release_status = "Development Status :: 4 - Beta"
dependencies = [
"db-dtypes>=0.3.0,<2.0.0",
"google-cloud-bigquery >= 3.13.0, <4.0.0",
"google-cloud-bigquery >= 3.41.0, <4.0.0",
"ipywidgets>=7.7.1",
"ipython>=7.23.1",
"ipykernel>=5.5.6",
Expand Down
137 changes: 120 additions & 17 deletions packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,23 +241,78 @@ def test__run_query_dry_run_without_errors_is_silent():
assert len(captured.stdout) == 0


def test__get_graph_name():
assert magics._get_graph_name("GRAPH foo.bar") == ("foo", "bar")
assert magics._get_graph_name("GRAPH `foo.bar`") is None
assert magics._get_graph_name("GRAPH `foo`.bar") is None
assert magics._get_graph_name("SELECT 1") is None


def test__get_graph_schema_exception():
bq_client = mock.create_autospec(bigquery.Client, instance=True)
bq_client.query.side_effect = Exception("error")
query_text = "GRAPH foo.bar"
query_job = mock.Mock()
query_job.configuration.destination.project = "my-project"

graph_ref = mock.Mock()
graph_ref.project = "my-project"
graph_ref.dataset_id = "dataset"
graph_ref.property_graph_id = "graph"
query_job.referenced_property_graphs = [graph_ref]

assert magics._get_graph_schema(bq_client, query_text, query_job) is None


def test__get_graph_schema_zero_references():
bq_client = mock.create_autospec(bigquery.Client, instance=True)
query_job = mock.Mock()
query_job.referenced_property_graphs = []

assert magics._get_graph_schema(bq_client, "SELECT 1", query_job) is None


def test__get_graph_schema_two_references():
bq_client = mock.create_autospec(bigquery.Client, instance=True)
query_job = mock.Mock()

ref1 = mock.Mock()
ref2 = mock.Mock()
query_job.referenced_property_graphs = [ref1, ref2]

assert magics._get_graph_schema(bq_client, "SELECT 1", query_job) is None


def test__get_graph_schema_success():
bq_client = mock.create_autospec(bigquery.Client, instance=True)
query_job = mock.Mock()

graph_ref = mock.Mock()
graph_ref.project = "my-project"
graph_ref.dataset_id = "dataset"
graph_ref.property_graph_id = "graph"
query_job.referenced_property_graphs = [graph_ref]

mock_df = mock.MagicMock()
mock_df.shape = (1, 1)
mock_df.iloc.__getitem__.return_value = "schema_json"
bq_client.query.return_value.to_dataframe.return_value = mock_df

with mock.patch(
"bigquery_magics.bigquery.graph_server._convert_schema"
) as convert_mock:
convert_mock.return_value = {"nodes": [], "edges": []}

result = magics._get_graph_schema(bq_client, "SELECT 1", query_job)

assert result == {"nodes": [], "edges": []}
convert_mock.assert_called_once_with("schema_json")

called_query = bq_client.query.call_args[0][0]
assert (
"FROM `my-project.dataset`.INFORMATION_SCHEMA.PROPERTY_GRAPHS"
in called_query
)

called_config = bq_client.query.call_args[1]["job_config"]
called_params = called_config.query_parameters
assert len(called_params) == 1
assert called_params[0].name == "graph_id"
assert called_params[0].value == "graph"


@pytest.mark.skipif(
bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
)
Expand Down Expand Up @@ -417,6 +472,12 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch):
reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present",
)
def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch):
"""If `spanner-graph-notebook` is not installed, the graph visualizer
widget cannot be displayed.
"""
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -468,6 +529,10 @@ def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_int_result(monkeypatch):
"""Graph visualization of integer scalars is supported."""
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -519,6 +584,10 @@ def test_bigquery_graph_int_result(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_str_result(monkeypatch):
"""Graph visualization of string scalars is supported."""
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -570,6 +639,10 @@ def test_bigquery_graph_str_result(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_json_json_result(monkeypatch):
"""Graph visualization of JSON objects with valid JSON string fields is supported."""
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -639,6 +712,9 @@ def test_bigquery_graph_json_json_result(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_json_result(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -758,6 +834,9 @@ def test_bigquery_graph_json_result(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_size_exceeds_max(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -813,6 +892,9 @@ def test_bigquery_graph_size_exceeds_max(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_size_exceeds_query_result_max(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -869,6 +951,9 @@ def test_bigquery_graph_size_exceeds_query_result_max(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_with_args_serialization(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -938,6 +1023,9 @@ def test_bigquery_graph_with_args_serialization(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_colab(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
# Mock the colab module so the code under test uses colab.register_callback(), rather than
# GraphServer.
sys.modules["google.colab"] = mock.Mock()
Expand Down Expand Up @@ -1073,6 +1161,9 @@ def test_colab_node_expansion_callback():
reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present",
)
def test_bigquery_graph_missing_spanner_deps(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -1142,11 +1233,17 @@ def test_add_graph_widget_with_schema(monkeypatch):
query_result = pandas.DataFrame([{"id": 1}], columns=["result"])
query_text = "GRAPH my_dataset.my_graph"

query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True)
query_job = mock.Mock()
query_job.configuration.destination.project = "p"
query_job.configuration.destination.dataset_id = "d"
query_job.configuration.destination.table_id = "t"

graph_ref = mock.Mock()
graph_ref.project = "p"
graph_ref.dataset_id = "my_dataset"
graph_ref.property_graph_id = "my_graph"
query_job.referenced_property_graphs = [graph_ref]
Comment on lines +1236 to +1245
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using mock.Mock() is less safe than mock.create_autospec as it doesn't enforce the mocked object's interface. It's better to stick with create_autospec for better test robustness. You can mock the referenced_property_graphs property by setting the underlying _properties dictionary, which is how the property gets its data. This is more aligned with how the actual QueryJob object works.

This feedback also applies to test_add_graph_widget_no_graph_name and test_add_graph_widget_schema_not_found.

Suggested change
query_job = mock.Mock()
query_job.configuration.destination.project = "p"
query_job.configuration.destination.dataset_id = "d"
query_job.configuration.destination.table_id = "t"
graph_ref = mock.Mock()
graph_ref.project = "p"
graph_ref.dataset_id = "my_dataset"
graph_ref.property_graph_id = "my_graph"
query_job.referenced_property_graphs = [graph_ref]
query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True)
query_job.configuration.destination.project = "p"
query_job.configuration.destination.dataset_id = "d"
query_job.configuration.destination.table_id = "t"
graph_ref_resource = {
"projectId": "p",
"datasetId": "my_dataset",
"propertyGraphId": "my_graph",
}
query_job._properties = {
"statistics": {
"query": {"referencedPropertyGraphs": [graph_ref_resource]}
}
}


args = mock.Mock()
args.bigquery_api_endpoint = "e"
args.project = "p"
Expand Down Expand Up @@ -1203,11 +1300,13 @@ def test_add_graph_widget_no_graph_name(monkeypatch):
query_result = pandas.DataFrame([{"id": 1}], columns=["result"])
query_text = "SELECT * FROM my_dataset.my_table"

query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True)
query_job = mock.Mock()
query_job.configuration.destination.project = "p"
query_job.configuration.destination.dataset_id = "d"
query_job.configuration.destination.table_id = "t"

query_job.referenced_property_graphs = []

args = mock.Mock()
args.bigquery_api_endpoint = "e"
args.project = "p"
Expand Down Expand Up @@ -1244,11 +1343,17 @@ def test_add_graph_widget_schema_not_found(monkeypatch):
query_result = pandas.DataFrame([{"id": 1}], columns=["result"])
query_text = "GRAPH my_dataset.my_graph"

query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True)
query_job = mock.Mock()
query_job.configuration.destination.project = "p"
query_job.configuration.destination.dataset_id = "d"
query_job.configuration.destination.table_id = "t"

graph_ref = mock.Mock()
graph_ref.project = "p"
graph_ref.dataset_id = "my_dataset"
graph_ref.property_graph_id = "my_graph"
query_job.referenced_property_graphs = [graph_ref]

args = mock.Mock()
args.bigquery_api_endpoint = "e"
args.project = "p"
Expand Down Expand Up @@ -1293,9 +1398,8 @@ def test_bigquery_magic_default_connection_user_agent():

client_info_arg = conn.call_args[1].get("client_info")
assert client_info_arg is not None
assert (
client_info_arg.user_agent
== f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}"
assert client_info_arg.user_agent.startswith(
f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}"
)


Expand Down Expand Up @@ -1611,9 +1715,8 @@ def warning_match(warning):
assert kwargs.get("credentials") is mock_credentials
client_info = kwargs.get("client_info")
assert client_info is not None
assert (
client_info.user_agent
== f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}"
assert client_info.user_agent.startswith(
f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}"
)

query_job_mock.to_dataframe.assert_called_once_with(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
from google.cloud.bigquery.table import _EmptyRowIterator
from google.cloud.bigquery.table import RangePartitioning
from google.cloud.bigquery.table import _table_arg_to_table_ref
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.table import TableReference, PropertyGraphReference
from google.cloud.bigquery.table import TimePartitioning
from google.cloud.bigquery._tqdm_helpers import wait_for_query

Expand Down Expand Up @@ -1332,6 +1332,30 @@ def referenced_tables(self):

return tables

@property
def referenced_property_graphs(self):
"""Return referenced property graphs from job statistics, if present.

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_property_graphs

Returns:
List[google.cloud.bigquery.table.PropertyGraphReference]:
mappings describing the property graphs, or an empty list
if the query has not yet completed.
"""
property_graphs = []

for pg in self._job_statistics().get("referencedPropertyGraphs", ()):
property_graphs.append(
PropertyGraphReference(
DatasetReference(pg["projectId"], pg["datasetId"]),
pg["propertyGraphId"],
)
)

return property_graphs

@property
def undeclared_query_parameters(self):
"""Return undeclared query parameters from job statistics, if present.
Expand Down
Loading
Loading