Skip to content

Commit 6429457

Browse files
dongyuj1copybara-github
authored andcommitted
feat: Add get_job_info tool to BigQuery toolset
This CL introduces a new tool, get_job_info, to the BigQuery toolset. This tool allows retrieving metadata about a BigQuery job, such as slot usage, job configuration, statistics, and job status. Closes #2928 Co-authored-by: Dongyu Jia <dongyuj@google.com> PiperOrigin-RevId: 825762399
1 parent 72a8d8d commit 6429457

5 files changed

Lines changed: 330 additions & 1 deletion

File tree

contributing/samples/bigquery/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ distributed via the `google.adk.tools.bigquery` module. These tools include:
2121

2222
Fetches metadata about a BigQuery table.
2323

24+
5. `get_job_info`
25+
Fetches metadata about a BigQuery job.
26+
2427
5. `execute_sql`
2528

2629
Runs or dry-runs a SQL query in BigQuery.

src/google/adk/tools/bigquery/bigquery_toolset.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ async def get_tools(
8080
metadata_tool.get_table_info,
8181
metadata_tool.list_dataset_ids,
8282
metadata_tool.list_table_ids,
83+
metadata_tool.get_job_info,
8384
query_tool.get_execute_sql(self._tool_settings),
8485
query_tool.forecast,
8586
query_tool.analyze_contribution,

src/google/adk/tools/bigquery/metadata_tool.py

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,297 @@ def get_table_info(
297297
"status": "ERROR",
298298
"error_details": str(ex),
299299
}
300+
301+
302+
def get_job_info(
303+
project_id: str,
304+
job_id: str,
305+
credentials: Credentials,
306+
settings: BigQueryToolConfig,
307+
) -> dict:
308+
"""Get metadata information about a BigQuery job. Including slot usage,
309+
job configuration, job statistics, job status, original query etc.
310+
311+
Args:
312+
project_id (str): The Google Cloud project id containing the job.
313+
job_id (str): The BigQuery job id.
314+
credentials (Credentials): The credentials to use for the request.
315+
settings (BigQueryToolConfig): The BigQuery tool settings.
316+
317+
Returns:
318+
dict: Dictionary representing the properties of the job.
319+
320+
Examples:
321+
>>> user may give job id in fomat of: project_id:region.job_id
322+
like bigquery-public-data:US.bquxjob_12345678_1234567890
323+
>>> get_job_info("bigquery-public-data", "bquxjob_12345678_1234567890")
324+
{
325+
"get_job_info_response": {
326+
"configuration": {
327+
"jobType": "QUERY",
328+
"query": {
329+
"destinationTable": {
330+
"datasetId": "_fd6de55d5d5c13fcfb0449cbf933bb695b2c3085",
331+
"projectId": "projectid",
332+
"tableId": "anonfbbe65d6_9782_469b_9f56_1392560314b2"
333+
},
334+
"priority": "INTERACTIVE",
335+
"query": "SELECT * FROM `projectid.dataset_id.table_id` WHERE TIMESTAMP_TRUNC(_PARTITIONTIME, DAY) = TIMESTAMP(\"2025-10-29\") LIMIT 1000",
336+
"useLegacySql": false,
337+
"writeDisposition": "WRITE_TRUNCATE"
338+
}
339+
},
340+
"etag": "EdeYv9sdcO7tD9HsffvcuQ==",
341+
"id": "projectid:US.job-id",
342+
"jobCreationReason": {
343+
"code": "REQUESTED"
344+
},
345+
"jobReference": {
346+
"jobId": "job-id",
347+
"location": "US",
348+
"projectId": "projectid"
349+
},
350+
"kind": "bigquery#job",
351+
"principal_subject": "user:abc@google.com",
352+
"selfLink": "https://bigquery.googleapis.com/bigquery/v2/projects/projectid/jobs/job-id?location=US",
353+
"statistics": {
354+
"creationTime": 1761760370152,
355+
"endTime": 1761760371250,
356+
"finalExecutionDurationMs": "489",
357+
"query": {
358+
"billingTier": 1,
359+
"cacheHit": false,
360+
"estimatedBytesProcessed": "5597805",
361+
"metadataCacheStatistics": {
362+
"tableMetadataCacheUsage": [
363+
{
364+
"explanation": "Table does not have CMETA.",
365+
"tableReference": {
366+
"datasetId": "datasetId",
367+
"projectId": "projectid",
368+
"tableId": "tableId"
369+
},
370+
"unusedReason": "OTHER_REASON"
371+
}
372+
]
373+
},
374+
"queryPlan": [
375+
{
376+
"completedParallelInputs": "3",
377+
"computeMode": "BIGQUERY",
378+
"computeMsAvg": "13",
379+
"computeMsMax": "15",
380+
"computeRatioAvg": 0.054852320675105488,
381+
"computeRatioMax": 0.063291139240506333,
382+
"endMs": "1761760370422",
383+
"id": "0",
384+
"name": "S00: Input",
385+
"parallelInputs": "8",
386+
"readMsAvg": "18",
387+
"readMsMax": "21",
388+
"readRatioAvg": 0.0759493670886076,
389+
"readRatioMax": 0.088607594936708861,
390+
"recordsRead": "1690",
391+
"recordsWritten": "1690",
392+
"shuffleOutputBytes": "1031149",
393+
"shuffleOutputBytesSpilled": "0",
394+
"slotMs": "157",
395+
"startMs": "1761760370388",
396+
"status": "COMPLETE",
397+
"steps": [
398+
{
399+
"kind": "READ",
400+
"substeps": [
401+
"$2:extendedFields.$is_not_null, $3:extendedFields.traceId, $4:span.$is_not_null, $5:span.spanKind, $6:span.endTime, $7:span.startTime, $8:span.parentSpanId, $9:span.spanId, $10:span.name, $11:span.childSpanCount.$is_not_null, $12:span.childSpanCount.value, $13:span.sameProcessAsParentSpan.$is_not_null, $14:span.sameProcessAsParentSpan.value, $15:span.status.$is_not_null, $16:span.status.message, $17:span.status.code",
402+
"FROM projectid.dataset_id.table_id",
403+
"WHERE equal(timestamp_trunc($1, 3), 1761696000.000000000)"
404+
]
405+
},
406+
{
407+
"kind": "LIMIT",
408+
"substeps": [
409+
"1000"
410+
]
411+
},
412+
{
413+
"kind": "WRITE",
414+
"substeps": [
415+
"$2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17",
416+
"TO __stage00_output"
417+
]
418+
}
419+
],
420+
"waitMsAvg": "1",
421+
"waitMsMax": "1",
422+
"waitRatioAvg": 0.0042194092827004216,
423+
"waitRatioMax": 0.0042194092827004216,
424+
"writeMsAvg": "2",
425+
"writeMsMax": "2",
426+
"writeRatioAvg": 0.0084388185654008432,
427+
"writeRatioMax": 0.0084388185654008432
428+
},
429+
{
430+
"completedParallelInputs": "1",
431+
"computeMode": "BIGQUERY",
432+
"computeMsAvg": "22",
433+
"computeMsMax": "22",
434+
"computeRatioAvg": 0.092827004219409287,
435+
"computeRatioMax": 0.092827004219409287,
436+
"endMs": "1761760370428",
437+
"id": "1",
438+
"inputStages": [
439+
"0"
440+
],
441+
"name": "S01: Compute+",
442+
"parallelInputs": "1",
443+
"readMsAvg": "0",
444+
"readMsMax": "0",
445+
"readRatioAvg": 0,
446+
"readRatioMax": 0,
447+
"recordsRead": "1001",
448+
"recordsWritten": "1000",
449+
"shuffleOutputBytes": "800157",
450+
"shuffleOutputBytesSpilled": "0",
451+
"slotMs": "29",
452+
"startMs": "1761760370398",
453+
"status": "COMPLETE",
454+
"steps": [
455+
{
456+
"kind": "READ",
457+
"substeps": [
458+
"$2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17",
459+
"FROM __stage00_output"
460+
]
461+
},
462+
{
463+
"kind": "COMPUTE",
464+
"substeps": [
465+
"$130 := MAKE_STRUCT($3, $2)",
466+
"$131 := MAKE_STRUCT($10, $9, $8, MAKE_STRUCT($29, $28, $27), $7, $6, MAKE_STRUCT(...), MAKE_STRUCT(...), MAKE_STRUCT(...), ...)"
467+
]
468+
},
469+
{
470+
"kind": "LIMIT",
471+
"substeps": [
472+
"1000"
473+
]
474+
},
475+
{
476+
"kind": "WRITE",
477+
"substeps": [
478+
"$130, $131",
479+
"TO __stage01_output"
480+
]
481+
}
482+
],
483+
"waitMsAvg": "7",
484+
"waitMsMax": "7",
485+
"waitRatioAvg": 0.029535864978902954,
486+
"waitRatioMax": 0.029535864978902954,
487+
"writeMsAvg": "4",
488+
"writeMsMax": "4",
489+
"writeRatioAvg": 0.016877637130801686,
490+
"writeRatioMax": 0.016877637130801686
491+
},
492+
{
493+
"completedParallelInputs": "1",
494+
"computeMode": "BIGQUERY",
495+
"computeMsAvg": "33",
496+
"computeMsMax": "33",
497+
"computeRatioAvg": 0.13924050632911392,
498+
"computeRatioMax": 0.13924050632911392,
499+
"endMs": "1761760370745",
500+
"id": "2",
501+
"inputStages": [
502+
"1"
503+
],
504+
"name": "S02: Output",
505+
"parallelInputs": "1",
506+
"readMsAvg": "0",
507+
"readMsMax": "0",
508+
"readRatioAvg": 0,
509+
"readRatioMax": 0,
510+
"recordsRead": "1000",
511+
"recordsWritten": "1000",
512+
"shuffleOutputBytes": "459829",
513+
"shuffleOutputBytesSpilled": "0",
514+
"slotMs": "106",
515+
"startMs": "1761760370667",
516+
"status": "COMPLETE",
517+
"steps": [
518+
{
519+
"kind": "READ",
520+
"substeps": [
521+
"$130, $131",
522+
"FROM __stage01_output"
523+
]
524+
},
525+
{
526+
"kind": "WRITE",
527+
"substeps": [
528+
"$130, $131",
529+
"TO __stage02_output"
530+
]
531+
}
532+
],
533+
"waitMsAvg": "237",
534+
"waitMsMax": "237",
535+
"waitRatioAvg": 1,
536+
"waitRatioMax": 1,
537+
"writeMsAvg": "55",
538+
"writeMsMax": "55",
539+
"writeRatioAvg": 0.2320675105485232,
540+
"writeRatioMax": 0.2320675105485232
541+
}
542+
],
543+
"referencedTables": [
544+
{
545+
"datasetId": "dataset_id",
546+
"projectId": "projectid",
547+
"tableId": "table_id"
548+
}
549+
],
550+
"statementType": "SELECT",
551+
"timeline": [
552+
{
553+
"completedUnits": "5",
554+
"elapsedMs": "492",
555+
"estimatedRunnableUnits": "0",
556+
"pendingUnits": "5",
557+
"totalSlotMs": "293"
558+
}
559+
],
560+
"totalBytesBilled": "10485760",
561+
"totalBytesProcessed": "5597805",
562+
"totalPartitionsProcessed": "2",
563+
"totalSlotMs": "293",
564+
"transferredBytes": "0"
565+
},
566+
"startTime": 1761760370268,
567+
"totalBytesProcessed": "5597805",
568+
"totalSlotMs": "293"
569+
},
570+
"status": {
571+
"state": "DONE"
572+
},
573+
"user_email": "abc@google.com"
574+
}
575+
}
576+
"""
577+
try:
578+
bq_client = client.get_bigquery_client(
579+
project=project_id,
580+
credentials=credentials,
581+
location=settings.location,
582+
user_agent=settings.application_name,
583+
)
584+
job = bq_client.get_job(job_id)
585+
# We need to use _properties to get the job info because it contains all
586+
# the job info.
587+
# pylint: disable=protected-access
588+
return job._properties
589+
except Exception as ex:
590+
return {
591+
"status": "ERROR",
592+
"error_details": str(ex),
593+
}

tests/unittests/tools/bigquery/test_bigquery_metadata_tool.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,36 @@ def test_get_table_info_no_default_auth(mock_default_auth, mock_get_table):
136136
mock_default_auth.assert_not_called()
137137

138138

139+
@mock.patch.dict(os.environ, {}, clear=True)
140+
@mock.patch("google.cloud.bigquery.Client.get_job", autospec=True)
141+
@mock.patch("google.auth.default", autospec=True)
142+
def test_get_job_info_no_default_auth(mock_default_auth, mock_get_job):
143+
"""Test get_job_info tool invocation involves no default auth."""
144+
mock_credentials = mock.create_autospec(Credentials, instance=True)
145+
tool_settings = BigQueryToolConfig()
146+
147+
# Simulate the behavior of default auth - on purpose throw exception when
148+
# the default auth is called
149+
mock_default_auth.side_effect = DefaultCredentialsError(
150+
"Your default credentials were not found"
151+
)
152+
153+
mock_get_job.return_value = mock.create_autospec(
154+
bigquery.QueryJob, instance=True
155+
)
156+
result = metadata_tool.get_job_info(
157+
"my_project_id",
158+
"my_job_id",
159+
mock_credentials,
160+
tool_settings,
161+
)
162+
assert result != {
163+
"status": "ERROR",
164+
"error_details": "Your default credentials were not found",
165+
}
166+
mock_default_auth.assert_not_called()
167+
168+
139169
@mock.patch(
140170
"google.adk.tools.bigquery.client.get_bigquery_client", autospec=True
141171
)

tests/unittests/tools/bigquery/test_bigquery_toolset.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,15 @@ async def test_bigquery_toolset_tools_default():
4141
tools = await toolset.get_tools()
4242
assert tools is not None
4343

44-
assert len(tools) == 9
44+
assert len(tools) == 10
4545
assert all([isinstance(tool, GoogleTool) for tool in tools])
4646

4747
expected_tool_names = set([
4848
"list_dataset_ids",
4949
"get_dataset_info",
5050
"list_table_ids",
5151
"get_table_info",
52+
"get_job_info",
5253
"execute_sql",
5354
"ask_data_insights",
5455
"forecast",

0 commit comments

Comments
 (0)