Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 01e6b3f

Browse files
committed
feat: Restore blob.exif method
1 parent 8002d81 commit 01e6b3f

File tree

3 files changed

+172
-1
lines changed

3 files changed

+172
-1
lines changed

bigframes/blob/_functions.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,57 @@ def udf(self):
126126
return self._session.read_gbq_function(udf_name)
127127

128128

129-
# Blur images. Takes ObjectRefRuntime as JSON string. Outputs ObjectRefRuntime JSON string. │
129+
def exif_func(src_obj_ref_rt: str, verbose: bool) -> str:
130+
try:
131+
import io
132+
import json
133+
134+
from PIL import ExifTags, Image
135+
import requests
136+
from requests import adapters
137+
138+
session = requests.Session()
139+
session.mount("https://", adapters.HTTPAdapter(max_retries=3))
140+
141+
src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
142+
src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
143+
144+
response = session.get(src_url, timeout=30)
145+
response.raise_for_status()
146+
bts = response.content
147+
148+
image = Image.open(io.BytesIO(bts))
149+
exif_data = image.getexif()
150+
exif_dict = {}
151+
152+
if exif_data:
153+
for tag, value in exif_data.items():
154+
tag_name = ExifTags.TAGS.get(tag, tag)
155+
# Convert non-serializable types to strings
156+
try:
157+
json.dumps(value)
158+
exif_dict[tag_name] = value
159+
except (TypeError, ValueError):
160+
exif_dict[tag_name] = str(value)
161+
162+
if verbose:
163+
return json.dumps({"status": "", "content": json.dumps(exif_dict)})
164+
else:
165+
return json.dumps(exif_dict)
166+
167+
except Exception as e:
168+
# Return error as JSON with error field
169+
error_result = {"status": f"{type(e).__name__}: {str(e)}", "content": "{}"}
170+
if verbose:
171+
return json.dumps(error_result)
172+
else:
173+
return "{}"
174+
175+
176+
exif_func_def = FunctionDef(exif_func, ["pillow", "requests"])
177+
178+
179+
# Blur images. Takes ObjectRefRuntime as JSON string. Outputs ObjectRefRuntime JSON string.
130180
def image_blur_func(
131181
src_obj_ref_rt: str,
132182
dst_obj_ref_rt: str,

bigframes/operations/blob.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,76 @@ def get_runtime_json_str(
336336
runtime = self._get_runtime(mode=mode, with_metadata=with_metadata)
337337
return runtime._apply_unary_op(ops.ToJSONString())
338338

339+
def exif(
340+
self,
341+
*,
342+
engine: Literal[None, "pillow"] = None,
343+
connection: Optional[str] = None,
344+
max_batching_rows: int = 8192,
345+
container_cpu: Union[float, int] = 0.33,
346+
container_memory: str = "512Mi",
347+
verbose: bool = False,
348+
) -> bigframes.series.Series:
349+
"""Extract EXIF data. Now only support image types.
350+
351+
Args:
352+
engine ('pillow' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
353+
connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
354+
max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function.
355+
container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
356+
container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
357+
verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content.
358+
359+
Returns:
360+
bigframes.series.Series: JSON series of key-value pairs if verbose=False, or struct with status and content if verbose=True.
361+
362+
Raises:
363+
ValueError: If engine is not 'pillow'.
364+
RuntimeError: If EXIF extraction fails or returns invalid structure.
365+
"""
366+
if engine is None or engine.casefold() != "pillow":
367+
raise ValueError("Must specify the engine, supported value is 'pillow'.")
368+
369+
import bigframes.bigquery as bbq
370+
import bigframes.blob._functions as blob_func
371+
import bigframes.pandas as bpd
372+
373+
connection = self._resolve_connection(connection)
374+
df = self.get_runtime_json_str(mode="R").to_frame()
375+
df["verbose"] = verbose
376+
377+
exif_udf = blob_func.TransformFunction(
378+
blob_func.exif_func_def,
379+
session=self._data._block.session,
380+
connection=connection,
381+
max_batching_rows=max_batching_rows,
382+
container_cpu=container_cpu,
383+
container_memory=container_memory,
384+
).udf()
385+
386+
res = self._apply_udf_or_raise_error(df, exif_udf, "EXIF extraction")
387+
388+
if verbose:
389+
try:
390+
exif_content_series = bbq.parse_json(
391+
res._apply_unary_op(ops.JSONValue(json_path="$.content"))
392+
).rename("exif_content")
393+
exif_status_series = res._apply_unary_op(
394+
ops.JSONValue(json_path="$.status")
395+
)
396+
except Exception as e:
397+
raise RuntimeError(f"Failed to parse EXIF JSON result: {e}") from e
398+
results_df = bpd.DataFrame(
399+
{"status": exif_status_series, "content": exif_content_series}
400+
)
401+
results_struct = bbq.struct(results_df).rename("exif_results")
402+
return results_struct
403+
else:
404+
try:
405+
return bbq.parse_json(res)
406+
except Exception as e:
407+
raise RuntimeError(f"Failed to parse EXIF JSON result: {e}") from e
408+
339409
def image_blur(
340410
self,
341411
ksize: tuple[int, int],

tests/system/large/blob/test_function.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,57 @@ def images_output_uris(images_output_folder: str) -> list[str]:
5454
]
5555

5656

57+
def test_blob_exif(
58+
bq_connection: str,
59+
session: bigframes.Session,
60+
):
61+
exif_image_df = session.from_glob_path(
62+
"gs://bigframes_blob_test/images_exif/*",
63+
name="blob_col",
64+
connection=bq_connection,
65+
)
66+
67+
actual = exif_image_df["blob_col"].blob.exif(
68+
engine="pillow", connection=bq_connection, verbose=False
69+
)
70+
expected = bpd.Series(
71+
['{"ExifOffset": 47, "Make": "MyCamera"}'],
72+
session=session,
73+
dtype=dtypes.JSON_DTYPE,
74+
)
75+
pd.testing.assert_series_equal(
76+
actual.to_pandas(),
77+
expected.to_pandas(),
78+
check_dtype=False,
79+
check_index_type=False,
80+
)
81+
82+
83+
def test_blob_exif_verbose(
84+
bq_connection: str,
85+
session: bigframes.Session,
86+
):
87+
exif_image_df = session.from_glob_path(
88+
"gs://bigframes_blob_test/images_exif/*",
89+
name="blob_col",
90+
connection=bq_connection,
91+
)
92+
93+
actual = exif_image_df["blob_col"].blob.exif(
94+
engine="pillow", connection=bq_connection, verbose=True
95+
)
96+
assert hasattr(actual, "struct")
97+
actual_exploded = actual.struct.explode()
98+
assert "status" in actual_exploded.columns
99+
assert "content" in actual_exploded.columns
100+
101+
status_series = actual_exploded["status"]
102+
assert status_series.dtype == dtypes.STRING_DTYPE
103+
104+
content_series = actual_exploded["content"]
105+
assert content_series.dtype == dtypes.JSON_DTYPE
106+
107+
57108
def test_blob_image_blur_to_series(
58109
images_mm_df: bpd.DataFrame,
59110
bq_connection: str,

0 commit comments

Comments
 (0)