Skip to content

Commit 941bd0e

Browse files
Merge branch 'fix/more-magick' into feature/e2e-func-tests
2 parents bd2d3c4 + fb07a25 commit 941bd0e

18 files changed

Lines changed: 1145 additions & 590 deletions

.env.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ OAUTH_AUDIENCE=crisp-athena-live
66

77
# Athena server configuration
88
# ATHENA_HOST=trust-messages.crispthinking.com
9-
ATHENA_AFFILIATE=athena-test
9+
ATHENA_AFFILIATE=athena-test

.github/workflows/ci.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131

3232
steps:
3333
- name: Checkout code
34-
uses: actions/checkout@v5
34+
uses: actions/checkout@v6
3535

3636
- name: Initialize submodules
3737
env:
@@ -42,7 +42,7 @@ jobs:
4242
git submodule update --init --recursive
4343
4444
- name: Install uv
45-
uses: astral-sh/setup-uv@v6
45+
uses: astral-sh/setup-uv@v7
4646
with:
4747
enable-cache: true
4848

@@ -81,13 +81,13 @@ jobs:
8181
runs-on: ubuntu-latest
8282

8383
steps:
84-
- uses: actions/checkout@v5
84+
- uses: actions/checkout@v6
8585

8686
- name: Initialize submodules
8787
run: git submodule update --init --recursive
8888

8989
- name: Install uv
90-
uses: astral-sh/setup-uv@v6
90+
uses: astral-sh/setup-uv@v7
9191
with:
9292
enable-cache: true
9393

@@ -118,7 +118,7 @@ jobs:
118118
run: uv build
119119

120120
- name: Upload build artifacts
121-
uses: actions/upload-artifact@v4
121+
uses: actions/upload-artifact@v5
122122
with:
123123
name: dist
124124
path: dist/
@@ -134,15 +134,15 @@ jobs:
134134
id-token: write
135135

136136
steps:
137-
- uses: actions/checkout@v5
137+
- uses: actions/checkout@v6
138138

139139
- name: Install uv
140-
uses: astral-sh/setup-uv@v6
140+
uses: astral-sh/setup-uv@v7
141141
with:
142142
enable-cache: true
143143

144144
- name: Download build artifacts
145-
uses: actions/download-artifact@v5
145+
uses: actions/download-artifact@v6
146146
with:
147147
name: dist
148148
path: dist/

.github/workflows/docs.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ jobs:
4242
runs-on: ubuntu-latest
4343
steps:
4444
- name: Checkout
45-
uses: actions/checkout@v5
45+
uses: actions/checkout@v6
4646
with:
4747
fetch-depth: 0
4848

4949
- name: Install uv
50-
uses: astral-sh/setup-uv@v6
50+
uses: astral-sh/setup-uv@v7
5151
with:
5252
enable-cache: true
5353

@@ -82,14 +82,14 @@ jobs:
8282
path: docs/_build/html
8383

8484
- name: Upload documentation artifacts
85-
uses: actions/upload-artifact@v4
85+
uses: actions/upload-artifact@v5
8686
if: github.event_name != 'release'
8787
with:
8888
name: documentation
8989
path: docs/_build/html
9090

9191
- name: Upload build artifacts for debugging
92-
uses: actions/upload-artifact@v4
92+
uses: actions/upload-artifact@v5
9393
if: failure()
9494
with:
9595
name: docs-build-artifacts

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ repos:
1313
hooks:
1414
- id: basedpyright
1515
name: basedpyright
16-
entry: basedpyright
16+
entry: uv run basedpyright
1717
language: system
1818
types_or: [python, pyi]
1919
pass_filenames: false

src/resolver_athena_client/client/athena_client.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,12 +239,18 @@ async def classify_single(
239239
else RequestEncoding.REQUEST_ENCODING_UNCOMPRESSED
240240
)
241241

242+
# Ensure we never send UNSPECIFIED format over the API
243+
# If format is still UNSPECIFIED, default to RAW_UINT8
244+
image_format = processed_image.image_format
245+
if image_format == ImageFormat.IMAGE_FORMAT_UNSPECIFIED:
246+
image_format = ImageFormat.IMAGE_FORMAT_RAW_UINT8_BGR
247+
242248
classification_input = ClassificationInput(
243249
affiliate=self.options.affiliate,
244250
correlation_id=correlation_id,
245251
encoding=request_encoding,
246252
data=processed_image.data,
247-
format=ImageFormat.IMAGE_FORMAT_RAW_UINT8,
253+
format=image_format,
248254
hashes=[
249255
ImageHash(
250256
value=hash_value,
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""Utility for detecting image formats from raw bytes."""
2+
3+
from resolver_athena_client.generated.athena.models_pb2 import ImageFormat
4+
5+
PNG_MAGIC_BYTES = b"\x89PNG"
6+
JPEG_MAGIC_BYTES = b"\xff\xd8\xff"
7+
GIF87A_MAGIC_BYTES = b"GIF87a"
8+
GIF89A_MAGIC_BYTES = b"GIF89a"
9+
BMP_MAGIC_BYTES = b"BM"
10+
WEBP_RIFF_MAGIC_BYTES = b"RIFF"
11+
WEBP_WEBP_MAGIC_BYTES = b"WEBP"
12+
TIFF_LE_MAGIC_BYTES = b"II*\x00"
13+
TIFF_BE_MAGIC_BYTES = b"MM\x00*"
14+
15+
16+
def detect_image_format(data: bytes) -> ImageFormat.ValueType: # noqa: PLR0911
17+
"""Detect image format from raw bytes using magic number signatures.
18+
19+
Args:
20+
----
21+
data: Raw image bytes to analyze
22+
23+
Returns:
24+
-------
25+
ImageFormat enum value representing the detected format
26+
27+
"""
28+
if not data:
29+
return ImageFormat.IMAGE_FORMAT_UNSPECIFIED
30+
31+
# Check magic numbers for common image formats
32+
# PNG: starts with PNG_MAGIC_BYTES
33+
png_len = len(PNG_MAGIC_BYTES)
34+
if len(data) >= png_len and data[:png_len] == PNG_MAGIC_BYTES:
35+
return ImageFormat.IMAGE_FORMAT_PNG
36+
37+
# JPEG: starts with JPEG_MAGIC_BYTES
38+
jpeg_len = len(JPEG_MAGIC_BYTES)
39+
if len(data) >= jpeg_len and data[:jpeg_len] == JPEG_MAGIC_BYTES:
40+
return ImageFormat.IMAGE_FORMAT_JPEG
41+
42+
# GIF: starts with GIF87A_MAGIC_BYTES or GIF89A_MAGIC_BYTES
43+
gif_len = len(GIF87A_MAGIC_BYTES)
44+
if len(data) >= gif_len and data[:gif_len] in (
45+
GIF87A_MAGIC_BYTES,
46+
GIF89A_MAGIC_BYTES,
47+
):
48+
return ImageFormat.IMAGE_FORMAT_GIF
49+
50+
# BMP: starts with BMP_MAGIC_BYTES
51+
bmp_len = len(BMP_MAGIC_BYTES)
52+
if len(data) >= bmp_len and data[:bmp_len] == BMP_MAGIC_BYTES:
53+
return ImageFormat.IMAGE_FORMAT_BMP
54+
55+
# WebP: RIFF....WEBP (12 bytes minimum for full signature)
56+
webp_min_len = len(WEBP_RIFF_MAGIC_BYTES) + len(WEBP_WEBP_MAGIC_BYTES) + 4
57+
if (
58+
len(data) >= webp_min_len
59+
and data[:4] == WEBP_RIFF_MAGIC_BYTES
60+
and data[8:12] == WEBP_WEBP_MAGIC_BYTES
61+
):
62+
return ImageFormat.IMAGE_FORMAT_WEBP
63+
64+
# TIFF: little-endian or big-endian magic bytes
65+
tiff_len = len(TIFF_LE_MAGIC_BYTES)
66+
if len(data) >= tiff_len and (
67+
data[:tiff_len] == TIFF_LE_MAGIC_BYTES
68+
or data[:tiff_len] == TIFF_BE_MAGIC_BYTES
69+
):
70+
return ImageFormat.IMAGE_FORMAT_TIFF
71+
72+
# Fallback when format cannot be determined
73+
return ImageFormat.IMAGE_FORMAT_UNSPECIFIED

src/resolver_athena_client/client/models/input_model.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@
66
"""
77

88
import hashlib
9+
from typing import TYPE_CHECKING
10+
11+
from resolver_athena_client.client.image_format_detector import (
12+
detect_image_format,
13+
)
14+
15+
if TYPE_CHECKING:
16+
from resolver_athena_client.generated.athena.models_pb2 import ImageFormat
917

1018

1119
class ImageData:
@@ -24,6 +32,8 @@ class ImageData:
2432
Attributes:
2533
----------
2634
data: The raw bytes of the image (modified in-place by transformers).
35+
image_format: The format of the image data (e.g., JPEG, PNG, RAW_UINT8).
36+
Updated by transformers when they change the format.
2737
sha256_hashes: List of SHA256 hashes tracking image transformations.
2838
Index 0 is the original image, subsequent indices track
2939
transformations.
@@ -66,6 +76,9 @@ def __init__(self, image_bytes: bytes) -> None:
6676
6777
"""
6878
self.data: bytes = image_bytes
79+
self.image_format: ImageFormat.ValueType = detect_image_format(
80+
image_bytes
81+
)
6982
self.sha256_hashes: list[str] = [
7083
hashlib.sha256(image_bytes).hexdigest()
7184
]

src/resolver_athena_client/client/transformers/classification_input.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,20 @@ def __init__(
4848
def _create_classification_input(
4949
self, image_data: ImageData
5050
) -> ClassificationInput:
51-
# Get image format and data
51+
# Ensure we never send UNSPECIFIED format over the API
52+
# If format is still UNSPECIFIED, default to RAW_UINT8
53+
image_format = image_data.image_format
54+
if image_format == ImageFormat.IMAGE_FORMAT_UNSPECIFIED:
55+
image_format = ImageFormat.IMAGE_FORMAT_RAW_UINT8_BGR
56+
5257
return ClassificationInput(
5358
affiliate=self.affiliate,
5459
correlation_id=self.correlation_provider.get_correlation_id(
5560
image_data.data
5661
),
5762
data=image_data.data,
5863
encoding=self.request_encoding,
59-
format=ImageFormat.IMAGE_FORMAT_RAW_UINT8,
64+
format=image_format,
6065
)
6166

6267
@override

src/resolver_athena_client/client/transformers/core.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,15 @@
1313

1414
from resolver_athena_client.client.consts import EXPECTED_HEIGHT, EXPECTED_WIDTH
1515
from resolver_athena_client.client.models import ImageData
16+
from resolver_athena_client.generated.athena.models_pb2 import ImageFormat
1617

1718
# Global optimization constants
1819
_target_size = (EXPECTED_WIDTH, EXPECTED_HEIGHT)
1920
_expected_raw_size = EXPECTED_WIDTH * EXPECTED_HEIGHT * 3
2021

2122

22-
def _is_raw_brg_expected_size(data: bytes) -> bool:
23-
"""Detect if data is already a raw BRG array of expected size."""
23+
def _is_raw_bgr_expected_size(data: bytes) -> bool:
24+
"""Detect if data is already a raw BGR array of expected size."""
2425
return len(data) == _expected_raw_size
2526

2627

@@ -37,7 +38,7 @@ async def resize_image(image_data: ImageData) -> ImageData:
3738

3839
def process_image() -> tuple[bytes, bool]:
3940
# Fast path for raw RGB arrays of correct size
40-
if _is_raw_brg_expected_size(image_data.data):
41+
if _is_raw_bgr_expected_size(image_data.data):
4142
return image_data.data, False # No transformation needed
4243

4344
# Try to load the image data directly
@@ -57,22 +58,23 @@ def process_image() -> tuple[bytes, bool]:
5758

5859
rgb_bytes = resized_image.tobytes()
5960

60-
# Convert RGB to BRG by swapping channels
61-
brg_bytes = bytearray(len(rgb_bytes))
61+
# Convert RGB to BGR by swapping channels
62+
bgr_bytes = bytearray(len(rgb_bytes))
6263

6364
for i in range(0, len(rgb_bytes), 3):
64-
brg_bytes[i] = rgb_bytes[i + 2]
65-
brg_bytes[i + 1] = rgb_bytes[i]
66-
brg_bytes[i + 2] = rgb_bytes[i + 1]
65+
bgr_bytes[i] = rgb_bytes[i + 2]
66+
bgr_bytes[i + 1] = rgb_bytes[i + 1]
67+
bgr_bytes[i + 2] = rgb_bytes[i]
6768

68-
return bytes(brg_bytes), True # Data was transformed
69+
return bytes(bgr_bytes), True # Data was transformed
6970

7071
# Use thread pool for CPU-intensive processing
7172
resized_bytes, was_transformed = await asyncio.to_thread(process_image)
7273

7374
# Only modify data and add hashes if transformation occurred
7475
if was_transformed:
7576
image_data.data = resized_bytes
77+
image_data.image_format = ImageFormat.IMAGE_FORMAT_RAW_UINT8_BGR
7678
image_data.add_transformation_hashes()
7779

7880
return image_data

0 commit comments

Comments
 (0)