Skip to content

Commit f76c597

Browse files
committed
fix: scan
1 parent d3db165 commit f76c597

5 files changed

Lines changed: 413 additions & 113 deletions

File tree

api/services/card_image_enhance.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""
2+
Light, conservative image cleanup applied *just before* the Groq OCR call.
3+
4+
The phone already ships a perspective-deskewed card (OpenCV warp in the Web
5+
Worker), but mobile cameras still send slightly soft, contrasty-or-flat frames
6+
depending on flash / lighting. A small auto-contrast + unsharp mask pass
7+
recovers a noticeable amount of OCR accuracy on the marginal frames without
8+
ever distorting an already-clean one.
9+
10+
Pure Pillow + stdlib — no numpy, no new dependency.
11+
"""
12+
13+
from __future__ import annotations
14+
15+
import io
16+
import logging
17+
18+
from PIL import Image, ImageFilter, ImageOps
19+
20+
logger = logging.getLogger(__name__)
21+
22+
#: Long edge cap. Groq vision has its own resize anyway; trimming here cuts
23+
#: upload + decode time without losing legibility for cards.
24+
_MAX_EDGE = 1280
25+
#: Histogram clip for auto-contrast — keeps highlights/shadows that matter.
26+
_AUTOCONTRAST_CUTOFF = 1
27+
#: Unsharp mask params tuned for printed text on small-edge images.
28+
_USM_RADIUS = 1.4
29+
_USM_PERCENT = 140
30+
_USM_THRESHOLD = 3
31+
32+
33+
def enhance_for_ocr(image_bytes: bytes) -> bytes:
34+
"""
35+
Return a JPEG with mild contrast + sharpening applied. Fails open: any
36+
decode/processing error returns the original bytes unchanged.
37+
"""
38+
try:
39+
img = Image.open(io.BytesIO(image_bytes))
40+
img = ImageOps.exif_transpose(img)
41+
if img.mode not in ("RGB", "L"):
42+
img = img.convert("RGB")
43+
44+
w, h = img.size
45+
long_edge = max(w, h)
46+
if long_edge > _MAX_EDGE:
47+
scale = _MAX_EDGE / float(long_edge)
48+
img = img.resize(
49+
(max(1, int(w * scale)), max(1, int(h * scale))),
50+
Image.Resampling.LANCZOS,
51+
)
52+
53+
img = ImageOps.autocontrast(img, cutoff=_AUTOCONTRAST_CUTOFF)
54+
img = img.filter(
55+
ImageFilter.UnsharpMask(
56+
radius=_USM_RADIUS,
57+
percent=_USM_PERCENT,
58+
threshold=_USM_THRESHOLD,
59+
)
60+
)
61+
62+
out = io.BytesIO()
63+
img.save(out, "JPEG", quality=92, optimize=True)
64+
return out.getvalue()
65+
except Exception as exc: # noqa: BLE001 - cleanup must never break the pipeline
66+
logger.debug("enhance_for_ocr fail-open: %s", exc)
67+
return image_bytes

api/services/scan_stream_service.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from core.database import SessionLocal
2828
from models.collection_card import CollectionCard
2929
from services import collection_card_service
30+
from services.card_image_enhance import enhance_for_ocr
3031
from services.card_image_gate import assess_card_image
3132
from services.collection_card_lookup_service import fetch_card_for_collection
3233
from services.ocr_service import extract_card_from_bytes
@@ -187,6 +188,10 @@ async def _process_scan(
187188
image_preview_data_url=preview,
188189
),
189190
)
191+
# Auto-contrast + light sharpening on the deskewed card recovers OCR
192+
# accuracy on soft / flat / over-lit shots without ever degrading a
193+
# clean one. Runs in the executor so we don't block the loop.
194+
ocr_bytes = await loop.run_in_executor(None, lambda: enhance_for_ocr(image_bytes))
190195
try:
191196
# PokéWallet enrichment is *on* here even though we skip pricing
192197
# later: it gives us a reliable English Pokémon name for JA prints
@@ -195,7 +200,7 @@ async def _process_scan(
195200
ocr_result = await loop.run_in_executor(
196201
None,
197202
lambda: extract_card_from_bytes(
198-
image_bytes,
203+
ocr_bytes,
199204
filename,
200205
enrich_from_pokewallet=True,
201206
user_hint=user_hint,

0 commit comments

Comments
 (0)