Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 53a0bea

Browse files
committed
refactor: simplify image_blur udf
1 parent 0ca7aba commit 53a0bea

File tree

1 file changed

+9
-129
lines changed

1 file changed

+9
-129
lines changed

notebooks/multimodal/multimodal_dataframe.ipynb

Lines changed: 9 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -570,16 +570,14 @@
570570
"FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n",
571571
"\n",
572572
"@bpd.udf(\n",
573-
" input_types=[str, str, str, int, int, bool],\n",
573+
" input_types=[str, str, int, int],\n",
574574
" output_type=str,\n",
575575
" dataset=DATASET_ID,\n",
576576
" name=\"image_blur\",\n",
577577
" bigquery_connection=FULL_CONNECTION_ID,\n",
578578
" packages=[\"opencv-python\", \"numpy\", \"requests\"],\n",
579579
")\n",
580-
"def image_blur(\n",
581-
" src_rt: str, dst_rt: str, ext: str, kx: int, ky: int, verbose: bool\n",
582-
") -> str:\n",
580+
"def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n",
583581
" import json\n",
584582
" import cv2 as cv\n",
585583
" import numpy as np\n",
@@ -602,36 +600,23 @@
602600
" kx, ky = int(kx), int(ky)\n",
603601
" img_blurred = cv.blur(img, ksize=(kx, ky))\n",
604602
" \n",
605-
" ext = ext or \".jpeg\"\n",
606-
" success, encoded = cv.imencode(ext, img_blurred)\n",
603+
" success, encoded = cv.imencode(\".jpeg\", img_blurred)\n",
607604
" if not success:\n",
608-
" raise ValueError(f\"cv.imencode failed for extension {ext}\")\n",
605+
" raise ValueError(\"cv.imencode failed\")\n",
609606
" \n",
610607
" # Handle two output modes\n",
611608
" if dst_rt: # GCS/Series output mode\n",
612609
" dst_obj = json.loads(dst_rt)\n",
613610
" dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n",
614611
" \n",
615-
" ext_ct = ext.replace(\".\", \"\")\n",
616-
" ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n",
617-
" ext_ct = ext_mappings.get(ext_ct, ext_ct)\n",
618-
" content_type = \"image/\" + ext_ct\n",
619-
" \n",
620-
" session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": content_type}, timeout=30).raise_for_status()\n",
612+
" session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n",
621613
" \n",
622614
" uri = dst_obj[\"objectref\"][\"uri\"]\n",
623-
" return json.dumps({\"status\": \"\", \"content\": uri}) if verbose else uri\n",
615+
" return uri\n",
624616
" \n",
625617
" else: # BigQuery bytes output mode \n",
626618
" image_bytes = encoded.tobytes()\n",
627-
" if verbose:\n",
628-
" return json.dumps({\n",
629-
" \"status\": \"\",\n",
630-
" \"content\": base64.b64encode(image_bytes).decode(),\n",
631-
" \"content_type\": f\"image/{ext_ct}\" if 'ext_ct' in locals() else \"image/jpeg\"\n",
632-
" })\n",
633-
" else: \n",
634-
" return base64.b64encode(image_bytes).decode()\n",
619+
" return base64.b64encode(image_bytes).decode()\n",
635620
"\n",
636621
"def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n",
637622
" import os\n",
@@ -644,10 +629,9 @@
644629
" df_transform = bpd.DataFrame({\n",
645630
" \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n",
646631
" \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n",
647-
" \"ext\": dst_uri.str.extract(r\"(\\.[0-9a-zA-Z]+$)\")[0]\n",
648632
" })\n",
649-
" res = df_transform[[\"src_rt\", \"dst_rt\", \"ext\"]].apply(\n",
650-
" udf, axis=1, args=(*args, verbose)\n",
633+
" res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(\n",
634+
" udf, axis=1, args=(*args)\n",
651635
" )\n",
652636
" return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n",
653637
"\n",
@@ -659,110 +643,6 @@
659643
"df_image[[\"image\", \"blurred\"]]"
660644
]
661645
},
662-
{
663-
"cell_type": "markdown",
664-
"metadata": {},
665-
"source": [
666-
"### Using `verbose` mode for detailed output\n",
667-
"\n",
668-
"All multimodal functions support a `verbose` parameter, which defaults to `False`.\n",
669-
"\n",
670-
"* When `verbose=False` (the default), the function will only return the main content of the result (e.g., the transformed image, the extracted text).\n",
671-
"* When `verbose=True`, the function returns a `STRUCT` containing two fields:\n",
672-
" * `content`: The main result of the operation.\n",
673-
" * `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\n",
674-
"\n",
675-
"Using `verbose=True` is highly recommended for debugging and for workflows where you need to handle potential failures on a row-by-row basis. Let's see it in action with the `image_blur` function."
676-
]
677-
},
678-
{
679-
"cell_type": "code",
680-
"execution_count": 9,
681-
"metadata": {},
682-
"outputs": [
683-
{
684-
"name": "stderr",
685-
"output_type": "stream",
686-
"text": [
687-
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n",
688-
" warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n"
689-
]
690-
},
691-
{
692-
"data": {
693-
"text/html": [
694-
"<div>\n",
695-
"<style scoped>\n",
696-
" .dataframe tbody tr th:only-of-type {\n",
697-
" vertical-align: middle;\n",
698-
" }\n",
699-
"\n",
700-
" .dataframe tbody tr th {\n",
701-
" vertical-align: top;\n",
702-
" }\n",
703-
"\n",
704-
" .dataframe thead th {\n",
705-
" text-align: right;\n",
706-
" }\n",
707-
"</style>\n",
708-
"<table border=\"1\" class=\"dataframe\">\n",
709-
" <thead>\n",
710-
" <tr style=\"text-align: right;\">\n",
711-
" <th></th>\n",
712-
" <th>blurred_verbose</th>\n",
713-
" </tr>\n",
714-
" </thead>\n",
715-
" <tbody>\n",
716-
" <tr>\n",
717-
" <th>0</th>\n",
718-
" <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n",
719-
" </tr>\n",
720-
" <tr>\n",
721-
" <th>1</th>\n",
722-
" <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n",
723-
" </tr>\n",
724-
" <tr>\n",
725-
" <th>2</th>\n",
726-
" <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n",
727-
" </tr>\n",
728-
" <tr>\n",
729-
" <th>3</th>\n",
730-
" <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n",
731-
" </tr>\n",
732-
" <tr>\n",
733-
" <th>4</th>\n",
734-
" <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n",
735-
" </tr>\n",
736-
" </tbody>\n",
737-
"</table>\n",
738-
"<p>5 rows × 1 columns</p>\n",
739-
"</div>[5 rows x 1 columns in total]"
740-
],
741-
"text/plain": [
742-
" blurred_verbose\n",
743-
"0 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n",
744-
"1 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n",
745-
"2 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n",
746-
"3 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n",
747-
"4 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n",
748-
"\n",
749-
"[5 rows x 1 columns]"
750-
]
751-
},
752-
"execution_count": 9,
753-
"metadata": {},
754-
"output_type": "execute_result"
755-
}
756-
],
757-
"source": [
758-
"df_image[\"blurred_verbose\"] = apply_transformation(\n",
759-
" df_image[\"image\"],\n",
760-
" f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose_v2/\",\n",
761-
" image_blur, 20, 20, verbose=True\n",
762-
")\n",
763-
"df_image[[\"blurred_verbose\"]]"
764-
]
765-
},
766646
{
767647
"cell_type": "markdown",
768648
"metadata": {

0 commit comments

Comments
 (0)