|
157 | 157 | " runtime = bbq.obj.get_access_url(s, mode=mode)\n", |
158 | 158 | " \n", |
159 | 159 | " # 3. Convert the runtime object to a JSON string\n", |
160 | | - " return bbq.to_json_string(runtime)" |
| 160 | + " return bbq.to_json_string(runtime)\n", |
| 161 | + "\n", |
| 162 | + "def get_metadata(series):\n", |
| 163 | + " # Fetch metadata and extract GCS metadata from the details JSON field\n", |
| 164 | + " metadata_obj = bbq.obj.fetch_metadata(series)\n", |
| 165 | + " return bbq.json_query(metadata_obj.struct.field(\"details\"), \"$.gcs_metadata\")\n", |
| 166 | + "\n", |
| 167 | + "def get_content_type(series):\n", |
| 168 | + " return bbq.json_value(get_metadata(series), \"$.content_type\")\n", |
| 169 | + "\n", |
| 170 | + "def get_size(series):\n", |
| 171 | + " return bbq.json_value(get_metadata(series), \"$.size\").astype(\"Int64\")\n", |
| 172 | + "\n", |
| 173 | + "def get_updated(series):\n", |
| 174 | + " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)\n", |
| 175 | + "\n", |
| 176 | + "def display_blob(series, n=3):\n", |
| 177 | + " import IPython.display as ipy_display\n", |
| 178 | + " import pandas as pd\n", |
| 179 | + " import requests\n", |
| 180 | + " \n", |
| 181 | + " # Retrieve access URLs and content types\n", |
| 182 | + " runtime_json = bbq.to_json_string(bbq.obj.get_access_url(series, mode=\"R\"))\n", |
| 183 | + " read_url = bbq.json_value(runtime_json, \"$.access_urls.read_url\")\n", |
| 184 | + " content_type = get_content_type(series)\n", |
| 185 | + " \n", |
| 186 | + " # Pull to pandas to display\n", |
| 187 | + " pdf = bpd.DataFrame({\"read_url\": read_url, \"content_type\": content_type}).head(n).to_pandas()\n", |
| 188 | + " \n", |
| 189 | + " width = bigframes.options.display.blob_display_width\n", |
| 190 | + " height = bigframes.options.display.blob_display_height\n", |
| 191 | + " \n", |
| 192 | + " for _, row in pdf.iterrows():\n", |
| 193 | + " if pd.isna(row[\"read_url\"]):\n", |
| 194 | + " ipy_display.display(\"<NA>\")\n", |
| 195 | + " elif pd.isna(row[\"content_type\"]):\n", |
| 196 | + " ipy_display.display(requests.get(row[\"read_url\"]).content)\n", |
| 197 | + " elif row[\"content_type\"].casefold().startswith(\"image\"):\n", |
| 198 | + " ipy_display.display(ipy_display.Image(url=row[\"read_url\"], width=width, height=height))\n", |
| 199 | + " elif row[\"content_type\"].casefold().startswith(\"audio\"):\n", |
| 200 | + " ipy_display.display(ipy_display.Audio(requests.get(row[\"read_url\"]).content))\n", |
| 201 | + " elif row[\"content_type\"].casefold().startswith(\"video\"):\n", |
| 202 | + " ipy_display.display(ipy_display.Video(row[\"read_url\"], width=width, height=height))\n", |
| 203 | + " else:\n", |
| 204 | + " ipy_display.display(requests.get(row[\"read_url\"]).content)" |
161 | 205 | ] |
162 | 206 | }, |
163 | 207 | { |
|
461 | 505 | "source": [ |
462 | 506 | "# Combine unstructured data with structured data\n", |
463 | 507 | "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", |
464 | | - "df_image[\"content_type\"] = df_image[\"image\"].blob.content_type()\n", |
465 | | - "df_image[\"size\"] = df_image[\"image\"].blob.size()\n", |
466 | | - "df_image[\"updated\"] = df_image[\"image\"].blob.updated()\n", |
| 508 | + "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", |
| 509 | + "df_image[\"size\"] = get_size(df_image[\"image\"])\n", |
| 510 | + "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", |
467 | 511 | "df_image" |
468 | 512 | ] |
469 | 513 | }, |
|
536 | 580 | ], |
537 | 581 | "source": [ |
538 | 582 | "# filter images and display, you can also display audio and video types\n", |
539 | | - "df_image[df_image[\"author\"] == \"alice\"][\"image\"].blob.display()" |
| 583 | + "display_blob(df_image[df_image[\"author\"] == \"alice\"][\"image\"])" |
540 | 584 | ] |
541 | 585 | }, |
542 | 586 | { |
|
0 commit comments