Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 11a8b5e

Browse files
committed
refactor: use direct API for audio transcription
1 parent 9f1ba1d commit 11a8b5e

File tree

1 file changed

+54
-41
lines changed

1 file changed

+54
-41
lines changed

notebooks/multimodal/multimodal_dataframe.ipynb

Lines changed: 54 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
},
9292
{
9393
"cell_type": "code",
94-
"execution_count": 3,
94+
"execution_count": 9,
9595
"metadata": {
9696
"colab": {
9797
"base_uri": "https://localhost:8080/"
@@ -1451,99 +1451,112 @@
14511451
"cell_type": "markdown",
14521452
"metadata": {},
14531453
"source": [
1454-
"### 6. Audio transcribe function"
1454+
"### 6. Audio transcribe"
14551455
]
14561456
},
14571457
{
14581458
"cell_type": "code",
1459-
"execution_count": 21,
1459+
"execution_count": 10,
14601460
"metadata": {},
1461-
"outputs": [
1462-
{
1463-
"name": "stderr",
1464-
"output_type": "stream",
1465-
"text": [
1466-
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
1467-
"instead of using `db_dtypes` in the future when available in pandas\n",
1468-
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
1469-
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
1470-
]
1471-
}
1472-
],
1461+
"outputs": [],
14731462
"source": [
14741463
"audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\n",
14751464
"df = bpd.from_glob_path(audio_gcs_path, name=\"audio\")"
14761465
]
14771466
},
14781467
{
14791468
"cell_type": "code",
1480-
"execution_count": 22,
1469+
"execution_count": 11,
14811470
"metadata": {},
14821471
"outputs": [
14831472
{
14841473
"name": "stderr",
14851474
"output_type": "stream",
14861475
"text": [
1487-
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
1488-
"instead of using `db_dtypes` in the future when available in pandas\n",
1489-
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
1490-
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n",
1491-
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
1476+
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
14921477
"instead of using `db_dtypes` in the future when available in pandas\n",
14931478
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
14941479
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
14951480
]
14961481
},
14971482
{
14981483
"data": {
1484+
"text/html": [
1485+
"<pre>0 Now, as all books, not primarily intended as p...</pre>"
1486+
],
14991487
"text/plain": [
15001488
"0 Now, as all books, not primarily intended as p...\n",
15011489
"Name: transcribed_content, dtype: string"
15021490
]
15031491
},
1504-
"execution_count": 22,
1492+
"execution_count": 11,
15051493
"metadata": {},
15061494
"output_type": "execute_result"
15071495
}
15081496
],
15091497
"source": [
1510-
"transcribed_series = df['audio'].blob.audio_transcribe(model_name=\"gemini-2.0-flash-001\", verbose=False)\n",
1498+
"import bigframes.bigquery as bbq\n",
1499+
"import bigframes.operations as ops\n",
1500+
"\n",
1501+
"# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n",
1502+
"# Here's how to perform the same operation directly:\n",
1503+
"\n",
1504+
"audio_series = df['audio']\n",
1505+
"prompt_text = (\n",
1506+
" \"**Task:** Transcribe the provided audio. **Instructions:** - Your response \"\n",
1507+
" \"must contain only the verbatim transcription of the audio. - Do not include \"\n",
1508+
" \"any introductory text, summaries, or conversational filler in your response. \"\n",
1509+
" \"The output should begin directly with the first word of the audio.\"\n",
1510+
")\n",
1511+
"\n",
1512+
"# Convert the audio series to the runtime representation required by the model.\n",
1513+
"# This involves fetching metadata and getting a signed access URL.\n",
1514+
"audio_metadata = audio_series._apply_unary_op(ops.obj_fetch_metadata_op)\n",
1515+
"audio_runtime = audio_metadata._apply_unary_op(ops.ObjGetAccessUrl(mode=\"R\"))\n",
1516+
"\n",
1517+
"transcribed_results = bbq.ai.generate(\n",
1518+
" prompt=(prompt_text, audio_runtime),\n",
1519+
" endpoint=\"gemini-2.0-flash-001\",\n",
1520+
" model_params={\"generationConfig\": {\"temperature\": 0.0}},\n",
1521+
")\n",
1522+
"\n",
1523+
"transcribed_series = transcribed_results.struct.field(\"result\").rename(\"transcribed_content\")\n",
15111524
"transcribed_series"
15121525
]
15131526
},
15141527
{
15151528
"cell_type": "code",
1516-
"execution_count": 23,
1529+
"execution_count": 12,
15171530
"metadata": {},
15181531
"outputs": [
1519-
{
1520-
"name": "stderr",
1521-
"output_type": "stream",
1522-
"text": [
1523-
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
1524-
"instead of using `db_dtypes` in the future when available in pandas\n",
1525-
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
1526-
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n",
1527-
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
1528-
"instead of using `db_dtypes` in the future when available in pandas\n",
1529-
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
1530-
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
1531-
]
1532-
},
15331532
{
15341533
"data": {
1534+
"text/html": [
1535+
"<pre>0 {'status': '', 'content': 'Now, as all books, ...</pre>"
1536+
],
15351537
"text/plain": [
15361538
"0 {'status': '', 'content': 'Now, as all books, ...\n",
15371539
"Name: transcription_results, dtype: struct<status: string, content: string>[pyarrow]"
15381540
]
15391541
},
1540-
"execution_count": 23,
1542+
"execution_count": 12,
15411543
"metadata": {},
15421544
"output_type": "execute_result"
15431545
}
15441546
],
15451547
"source": [
1546-
"transcribed_series_verbose = df['audio'].blob.audio_transcribe(model_name=\"gemini-2.0-flash-001\", verbose=True)\n",
1548+
"# To get verbose results (including status), we can extract both fields from the result struct.\n",
1549+
"transcribed_content_series = transcribed_results.struct.field(\"result\")\n",
1550+
"transcribed_status_series = transcribed_results.struct.field(\"status\")\n",
1551+
"\n",
1552+
"transcribed_series_verbose = bpd.DataFrame(\n",
1553+
" {\n",
1554+
" \"status\": transcribed_status_series,\n",
1555+
" \"content\": transcribed_content_series,\n",
1556+
" }\n",
1557+
")\n",
1558+
"# Package as a struct for consistent display\n",
1559+
"transcribed_series_verbose = bbq.struct(transcribed_series_verbose).rename(\"transcription_results\")\n",
15471560
"transcribed_series_verbose"
15481561
]
15491562
}
@@ -1567,7 +1580,7 @@
15671580
"name": "python",
15681581
"nbconvert_exporter": "python",
15691582
"pygments_lexer": "ipython3",
1570-
"version": "3.10.18"
1583+
"version": "3.13.0"
15711584
}
15721585
},
15731586
"nbformat": 4,

0 commit comments

Comments
 (0)