Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 91fdf0d

Browse files
committed
docs: replace internal blob APIs with helper functions in notebook
1 parent 0434f26 commit 91fdf0d

File tree

1 file changed

+49
-5
lines changed

1 file changed

+49
-5
lines changed

notebooks/multimodal/multimodal_dataframe.ipynb

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,51 @@
157157
" runtime = bbq.obj.get_access_url(s, mode=mode)\n",
158158
" \n",
159159
" # 3. Convert the runtime object to a JSON string\n",
160-
" return bbq.to_json_string(runtime)"
160+
" return bbq.to_json_string(runtime)\n",
161+
"\n",
162+
"def get_metadata(series):\n",
163+
" # Fetch metadata and extract GCS metadata from the details JSON field\n",
164+
" metadata_obj = bbq.obj.fetch_metadata(series)\n",
165+
" return bbq.json_query(metadata_obj.struct.field(\"details\"), \"$.gcs_metadata\")\n",
166+
"\n",
167+
"def get_content_type(series):\n",
168+
" return bbq.json_value(get_metadata(series), \"$.content_type\")\n",
169+
"\n",
170+
"def get_size(series):\n",
171+
" return bbq.json_value(get_metadata(series), \"$.size\").astype(\"Int64\")\n",
172+
"\n",
173+
"def get_updated(series):\n",
174+
" return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)\n",
175+
"\n",
176+
"def display_blob(series, n=3):\n",
177+
" import IPython.display as ipy_display\n",
178+
" import pandas as pd\n",
179+
" import requests\n",
180+
" \n",
181+
" # Retrieve access URLs and content types\n",
182+
" runtime_json = bbq.to_json_string(bbq.obj.get_access_url(series, mode=\"R\"))\n",
183+
" read_url = bbq.json_value(runtime_json, \"$.access_urls.read_url\")\n",
184+
" content_type = get_content_type(series)\n",
185+
" \n",
186+
" # Pull to pandas to display\n",
187+
" pdf = bpd.DataFrame({\"read_url\": read_url, \"content_type\": content_type}).head(n).to_pandas()\n",
188+
" \n",
189+
" width = bigframes.options.display.blob_display_width\n",
190+
" height = bigframes.options.display.blob_display_height\n",
191+
" \n",
192+
" for _, row in pdf.iterrows():\n",
193+
" if pd.isna(row[\"read_url\"]):\n",
194+
" ipy_display.display(\"<NA>\")\n",
195+
" elif pd.isna(row[\"content_type\"]):\n",
196+
" ipy_display.display(requests.get(row[\"read_url\"]).content)\n",
197+
" elif row[\"content_type\"].casefold().startswith(\"image\"):\n",
198+
" ipy_display.display(ipy_display.Image(url=row[\"read_url\"], width=width, height=height))\n",
199+
" elif row[\"content_type\"].casefold().startswith(\"audio\"):\n",
200+
" ipy_display.display(ipy_display.Audio(requests.get(row[\"read_url\"]).content))\n",
201+
" elif row[\"content_type\"].casefold().startswith(\"video\"):\n",
202+
" ipy_display.display(ipy_display.Video(row[\"read_url\"], width=width, height=height))\n",
203+
" else:\n",
204+
" ipy_display.display(requests.get(row[\"read_url\"]).content)"
161205
]
162206
},
163207
{
@@ -461,9 +505,9 @@
461505
"source": [
462506
"# Combine unstructured data with structured data\n",
463507
"df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n",
464-
"df_image[\"content_type\"] = df_image[\"image\"].blob.content_type()\n",
465-
"df_image[\"size\"] = df_image[\"image\"].blob.size()\n",
466-
"df_image[\"updated\"] = df_image[\"image\"].blob.updated()\n",
508+
"df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n",
509+
"df_image[\"size\"] = get_size(df_image[\"image\"])\n",
510+
"df_image[\"updated\"] = get_updated(df_image[\"image\"])\n",
467511
"df_image"
468512
]
469513
},
@@ -536,7 +580,7 @@
536580
],
537581
"source": [
538582
"# filter images and display, you can also display audio and video types\n",
539-
"df_image[df_image[\"author\"] == \"alice\"][\"image\"].blob.display()"
583+
"display_blob(df_image[df_image[\"author\"] == \"alice\"][\"image\"])"
540584
]
541585
},
542586
{

0 commit comments

Comments
 (0)