refactor: simplify image_blur udf

shuoweil · shuoweil · commit 53a0bea21abd · 2026-02-20T19:18:04.000Z
diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb
@@ -570,16 +570,14 @@
         "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n",
         "\n",
         "@bpd.udf(\n",
-        "    input_types=[str, str, str, int, int, bool],\n",
+        "    input_types=[str, str, int, int],\n",
         "    output_type=str,\n",
         "    dataset=DATASET_ID,\n",
         "    name=\"image_blur\",\n",
         "    bigquery_connection=FULL_CONNECTION_ID,\n",
         "    packages=[\"opencv-python\", \"numpy\", \"requests\"],\n",
         ")\n",
-        "def image_blur(\n",
-        "    src_rt: str, dst_rt: str, ext: str, kx: int, ky: int, verbose: bool\n",
-        ") -> str:\n",
+        "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n",
         "    import json\n",
         "    import cv2 as cv\n",
         "    import numpy as np\n",
@@ -602,36 +600,23 @@
         "    kx, ky = int(kx), int(ky)\n",
         "    img_blurred = cv.blur(img, ksize=(kx, ky))\n",
         "      \n",
-        "    ext = ext or \".jpeg\"\n",
-        "    success, encoded = cv.imencode(ext, img_blurred)\n",
+        "    success, encoded = cv.imencode(\".jpeg\", img_blurred)\n",
         "    if not success:\n",
-        "        raise ValueError(f\"cv.imencode failed for extension {ext}\")\n",
+        "        raise ValueError(\"cv.imencode failed\")\n",
         "      \n",
         "    # Handle two output modes\n",
         "    if dst_rt:  # GCS/Series output mode\n",
         "        dst_obj = json.loads(dst_rt)\n",
         "        dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n",
         "          \n",
-        "        ext_ct = ext.replace(\".\", \"\")\n",
-        "        ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n",
-        "        ext_ct = ext_mappings.get(ext_ct, ext_ct)\n",
-        "        content_type = \"image/\" + ext_ct\n",
-        "          \n",
-        "        session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": content_type}, timeout=30).raise_for_status()\n",
+        "        session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n",
         "          \n",
         "        uri = dst_obj[\"objectref\"][\"uri\"]\n",
-        "        return json.dumps({\"status\": \"\", \"content\": uri}) if verbose else uri\n",
+        "        return uri\n",
         "                  \n",
         "    else:  # BigQuery bytes output mode  \n",
         "        image_bytes = encoded.tobytes()\n",
-        "        if verbose:\n",
-        "            return json.dumps({\n",
-        "                \"status\": \"\",\n",
-        "                \"content\": base64.b64encode(image_bytes).decode(),\n",
-        "                \"content_type\": f\"image/{ext_ct}\" if 'ext_ct' in locals() else \"image/jpeg\"\n",
-        "            })\n",
-        "        else:  \n",
-        "            return base64.b64encode(image_bytes).decode()\n",
+        "        return base64.b64encode(image_bytes).decode()\n",
         "\n",
         "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n",
         "    import os\n",
@@ -644,10 +629,9 @@
         "    df_transform = bpd.DataFrame({\n",
         "        \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n",
         "        \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n",
-        "        \"ext\": dst_uri.str.extract(r\"(\\.[0-9a-zA-Z]+$)\")[0]\n",
         "    })\n",
-        "    res = df_transform[[\"src_rt\", \"dst_rt\", \"ext\"]].apply(\n",
-        "        udf, axis=1, args=(*args, verbose)\n",
+        "    res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(\n",
+        "        udf, axis=1, args=(*args)\n",
         "    )\n",
         "    return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n",
         "\n",
@@ -659,110 +643,6 @@
         "df_image[[\"image\", \"blurred\"]]"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Using `verbose` mode for detailed output\n",
-        "\n",
-        "All multimodal functions support a `verbose` parameter, which defaults to `False`.\n",
-        "\n",
-        "*   When `verbose=False` (the default), the function will only return the main content of the result (e.g., the transformed image, the extracted text).\n",
-        "*   When `verbose=True`, the function returns a `STRUCT` containing two fields:\n",
-        "    *   `content`: The main result of the operation.\n",
-        "    *   `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\n",
-        "\n",
-        "Using `verbose=True` is highly recommended for debugging and for workflows where you need to handle potential failures on a row-by-row basis. Let's see it in action with the `image_blur` function."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 9,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n",
-            "  warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>blurred_verbose</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>1</th>\n",
-              "      <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>2</th>\n",
-              "      <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>3</th>\n",
-              "      <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <th>4</th>\n",
-              "      <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "<p>5 rows × 1 columns</p>\n",
-              "</div>[5 rows x 1 columns in total]"
-            ],
-            "text/plain": [
-              "                                     blurred_verbose\n",
-              "0  {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n",
-              "1  {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n",
-              "2  {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n",
-              "3  {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n",
-              "4  {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n",
-              "\n",
-              "[5 rows x 1 columns]"
-            ]
-          },
-          "execution_count": 9,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "df_image[\"blurred_verbose\"] = apply_transformation(\n",
-        "    df_image[\"image\"],\n",
-        "    f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose_v2/\",\n",
-        "    image_blur, 20, 20, verbose=True\n",
-        ")\n",
-        "df_image[[\"blurred_verbose\"]]"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {