|
570 | 570 | "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", |
571 | 571 | "\n", |
572 | 572 | "@bpd.udf(\n", |
573 | | - " input_types=[str, str, str, int, int, bool],\n", |
| 573 | + " input_types=[str, str, int, int],\n", |
574 | 574 | " output_type=str,\n", |
575 | 575 | " dataset=DATASET_ID,\n", |
576 | 576 | " name=\"image_blur\",\n", |
577 | 577 | " bigquery_connection=FULL_CONNECTION_ID,\n", |
578 | 578 | " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", |
579 | 579 | ")\n", |
580 | | - "def image_blur(\n", |
581 | | - " src_rt: str, dst_rt: str, ext: str, kx: int, ky: int, verbose: bool\n", |
582 | | - ") -> str:\n", |
| 580 | + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", |
583 | 581 | " import json\n", |
584 | 582 | " import cv2 as cv\n", |
585 | 583 | " import numpy as np\n", |
|
602 | 600 | " kx, ky = int(kx), int(ky)\n", |
603 | 601 | " img_blurred = cv.blur(img, ksize=(kx, ky))\n", |
604 | 602 | " \n", |
605 | | - " ext = ext or \".jpeg\"\n", |
606 | | - " success, encoded = cv.imencode(ext, img_blurred)\n", |
| 603 | + " success, encoded = cv.imencode(\".jpeg\", img_blurred)\n", |
607 | 604 | " if not success:\n", |
608 | | - " raise ValueError(f\"cv.imencode failed for extension {ext}\")\n", |
| 605 | + " raise ValueError(\"cv.imencode failed\")\n", |
609 | 606 | " \n", |
610 | 607 | " # Handle two output modes\n", |
611 | 608 | " if dst_rt: # GCS/Series output mode\n", |
612 | 609 | " dst_obj = json.loads(dst_rt)\n", |
613 | 610 | " dst_url = dst_obj[\"access_urls\"][\"write_url\"]\n", |
614 | 611 | " \n", |
615 | | - " ext_ct = ext.replace(\".\", \"\")\n", |
616 | | - " ext_mappings = {\"jpg\": \"jpeg\", \"tif\": \"tiff\"}\n", |
617 | | - " ext_ct = ext_mappings.get(ext_ct, ext_ct)\n", |
618 | | - " content_type = \"image/\" + ext_ct\n", |
619 | | - " \n", |
620 | | - " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": content_type}, timeout=30).raise_for_status()\n", |
| 612 | + " session.put(dst_url, data=encoded.tobytes(), headers={\"Content-Type\": \"image/jpeg\"}, timeout=30).raise_for_status()\n", |
621 | 613 | " \n", |
622 | 614 | " uri = dst_obj[\"objectref\"][\"uri\"]\n", |
623 | | - " return json.dumps({\"status\": \"\", \"content\": uri}) if verbose else uri\n", |
| 615 | + " return uri\n", |
624 | 616 | " \n", |
625 | 617 | " else: # BigQuery bytes output mode \n", |
626 | 618 | " image_bytes = encoded.tobytes()\n", |
627 | | - " if verbose:\n", |
628 | | - " return json.dumps({\n", |
629 | | - " \"status\": \"\",\n", |
630 | | - " \"content\": base64.b64encode(image_bytes).decode(),\n", |
631 | | - " \"content_type\": f\"image/{ext_ct}\" if 'ext_ct' in locals() else \"image/jpeg\"\n", |
632 | | - " })\n", |
633 | | - " else: \n", |
634 | | - " return base64.b64encode(image_bytes).decode()\n", |
| 619 | + " return base64.b64encode(image_bytes).decode()\n", |
635 | 620 | "\n", |
636 | 621 | "def apply_transformation(series, dst_folder, udf, *args, verbose=False):\n", |
637 | 622 | " import os\n", |
|
644 | 629 | " df_transform = bpd.DataFrame({\n", |
645 | 630 | " \"src_rt\": get_runtime_json_str(series, mode=\"R\"),\n", |
646 | 631 | " \"dst_rt\": get_runtime_json_str(dst_blob, mode=\"RW\"),\n", |
647 | | - " \"ext\": dst_uri.str.extract(r\"(\\.[0-9a-zA-Z]+$)\")[0]\n", |
648 | 632 | " })\n", |
649 | | - " res = df_transform[[\"src_rt\", \"dst_rt\", \"ext\"]].apply(\n", |
650 | | - " udf, axis=1, args=(*args, verbose)\n", |
| 633 | + " res = df_transform[[\"src_rt\", \"dst_rt\"]].apply(\n", |
| 634 | + " udf, axis=1, args=(*args)\n", |
651 | 635 | " )\n", |
652 | 636 | " return res if verbose else res.str.to_blob(connection=FULL_CONNECTION_ID)\n", |
653 | 637 | "\n", |
|
659 | 643 | "df_image[[\"image\", \"blurred\"]]" |
660 | 644 | ] |
661 | 645 | }, |
662 | | - { |
663 | | - "cell_type": "markdown", |
664 | | - "metadata": {}, |
665 | | - "source": [ |
666 | | - "### Using `verbose` mode for detailed output\n", |
667 | | - "\n", |
668 | | - "All multimodal functions support a `verbose` parameter, which defaults to `False`.\n", |
669 | | - "\n", |
670 | | - "* When `verbose=False` (the default), the function will only return the main content of the result (e.g., the transformed image, the extracted text).\n", |
671 | | - "* When `verbose=True`, the function returns a `STRUCT` containing two fields:\n", |
672 | | - " * `content`: The main result of the operation.\n", |
673 | | - " * `status`: An informational field. If the operation is successful, this will be empty. If an error occurs during the processing of a specific row, this field will contain the error message, allowing the overall job to complete without failing.\n", |
674 | | - "\n", |
675 | | - "Using `verbose=True` is highly recommended for debugging and for workflows where you need to handle potential failures on a row-by-row basis. Let's see it in action with the `image_blur` function." |
676 | | - ] |
677 | | - }, |
678 | | - { |
679 | | - "cell_type": "code", |
680 | | - "execution_count": 9, |
681 | | - "metadata": {}, |
682 | | - "outputs": [ |
683 | | - { |
684 | | - "name": "stderr", |
685 | | - "output_type": "stream", |
686 | | - "text": [ |
687 | | - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", |
688 | | - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n" |
689 | | - ] |
690 | | - }, |
691 | | - { |
692 | | - "data": { |
693 | | - "text/html": [ |
694 | | - "<div>\n", |
695 | | - "<style scoped>\n", |
696 | | - " .dataframe tbody tr th:only-of-type {\n", |
697 | | - " vertical-align: middle;\n", |
698 | | - " }\n", |
699 | | - "\n", |
700 | | - " .dataframe tbody tr th {\n", |
701 | | - " vertical-align: top;\n", |
702 | | - " }\n", |
703 | | - "\n", |
704 | | - " .dataframe thead th {\n", |
705 | | - " text-align: right;\n", |
706 | | - " }\n", |
707 | | - "</style>\n", |
708 | | - "<table border=\"1\" class=\"dataframe\">\n", |
709 | | - " <thead>\n", |
710 | | - " <tr style=\"text-align: right;\">\n", |
711 | | - " <th></th>\n", |
712 | | - " <th>blurred_verbose</th>\n", |
713 | | - " </tr>\n", |
714 | | - " </thead>\n", |
715 | | - " <tbody>\n", |
716 | | - " <tr>\n", |
717 | | - " <th>0</th>\n", |
718 | | - " <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n", |
719 | | - " </tr>\n", |
720 | | - " <tr>\n", |
721 | | - " <th>1</th>\n", |
722 | | - " <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n", |
723 | | - " </tr>\n", |
724 | | - " <tr>\n", |
725 | | - " <th>2</th>\n", |
726 | | - " <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n", |
727 | | - " </tr>\n", |
728 | | - " <tr>\n", |
729 | | - " <th>3</th>\n", |
730 | | - " <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n", |
731 | | - " </tr>\n", |
732 | | - " <tr>\n", |
733 | | - " <th>4</th>\n", |
734 | | - " <td>{\"status\": \"\", \"content\": \"gs://bigframes_blob...</td>\n", |
735 | | - " </tr>\n", |
736 | | - " </tbody>\n", |
737 | | - "</table>\n", |
738 | | - "<p>5 rows × 1 columns</p>\n", |
739 | | - "</div>[5 rows x 1 columns in total]" |
740 | | - ], |
741 | | - "text/plain": [ |
742 | | - " blurred_verbose\n", |
743 | | - "0 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", |
744 | | - "1 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", |
745 | | - "2 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", |
746 | | - "3 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", |
747 | | - "4 {\"status\": \"\", \"content\": \"gs://bigframes_blob...\n", |
748 | | - "\n", |
749 | | - "[5 rows x 1 columns]" |
750 | | - ] |
751 | | - }, |
752 | | - "execution_count": 9, |
753 | | - "metadata": {}, |
754 | | - "output_type": "execute_result" |
755 | | - } |
756 | | - ], |
757 | | - "source": [ |
758 | | - "df_image[\"blurred_verbose\"] = apply_transformation(\n", |
759 | | - " df_image[\"image\"],\n", |
760 | | - " f\"gs://{OUTPUT_BUCKET}/image_blur_transformed_verbose_v2/\",\n", |
761 | | - " image_blur, 20, 20, verbose=True\n", |
762 | | - ")\n", |
763 | | - "df_image[[\"blurred_verbose\"]]" |
764 | | - ] |
765 | | - }, |
766 | 646 | { |
767 | 647 | "cell_type": "markdown", |
768 | 648 | "metadata": { |
|
0 commit comments