Skip to content

Commit 6047ea3

Browse files
committed
fix: scan
1 parent b84cd42 commit 6047ea3

2 files changed

Lines changed: 174 additions & 2 deletions

File tree

api/services/card_image_gate.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
"""
2+
Cheap pre-check run *before* any (paid) Groq vision call: does this frame
3+
actually contain a sharp, framed trading card?
4+
5+
A phone camera in "cash register" mode streams many frames; without this gate
6+
an empty desk, a blurry pan or a finger would each cost a Groq request and
7+
trip 429s. Pure Pillow + stdlib — no numpy / OpenCV dependency.
8+
9+
Heuristics (all must pass):
10+
11+
* **detail** — luminance stddev. An empty/uniform surface is flat.
12+
* **focus** — edge-energy spread (variance-of-Laplacian proxy). A blurry
13+
or motion-smeared frame has little high-frequency content.
14+
* **framing** — the "busy" region (thresholded edges) must fill a large,
15+
roughly centered, card-proportioned part of the frame, i.e. an actual card
16+
presented to the lens rather than clutter in a corner.
17+
"""
18+
19+
from __future__ import annotations
20+
21+
import io
22+
import logging
23+
from dataclasses import dataclass
24+
25+
from PIL import Image, ImageFilter, ImageOps, ImageStat
26+
27+
logger = logging.getLogger(__name__)
28+
29+
#: Long edge the frame is downscaled to before analysis (speed).
30+
_ANALYZE_EDGE = 256
31+
#: Edge-map binarisation threshold (0–255) for the "busy region" mask.
32+
_EDGE_BIN_THRESHOLD = 38
33+
34+
# --- Acceptance thresholds (strict: better to skip a frame than to spam Groq;
35+
# a real card held in frame produces several qualifying frames anyway).
36+
_MIN_DETAIL_STDDEV = 17.0
37+
_MIN_FOCUS = 9.0
38+
_MIN_FILL = 0.42
39+
_CENTER_LO = 0.22
40+
_CENTER_HI = 0.78
41+
_CARD_AR_MIN = 0.45
42+
_CARD_AR_MAX = 2.20
43+
44+
45+
@dataclass(frozen=True)
46+
class CardGateResult:
47+
"""Outcome of :func:`assess_card_image`."""
48+
49+
is_card: bool
50+
#: ``ok`` | ``unreadable`` | ``too_small`` | ``empty`` | ``blurry`` | ``no_card``
51+
reason: str
52+
detail: float
53+
focus: float
54+
fill: float
55+
56+
57+
def _bbox_metrics(edges: Image.Image, w: int, h: int) -> tuple[float, bool, bool]:
58+
"""Return ``(fill_ratio, centered, aspect_ok)`` for the thresholded edge map."""
59+
lut = [0 if i <= _EDGE_BIN_THRESHOLD else 255 for i in range(256)]
60+
box = edges.point(lut).getbbox()
61+
if not box:
62+
return 0.0, False, False
63+
bx0, by0, bx1, by1 = box
64+
bw = max(1, bx1 - bx0)
65+
bh = max(1, by1 - by0)
66+
fill = (bw * bh) / float(w * h)
67+
cx = (bx0 + bx1) / 2.0 / w
68+
cy = (by0 + by1) / 2.0 / h
69+
centered = _CENTER_LO <= cx <= _CENTER_HI and _CENTER_LO <= cy <= _CENTER_HI
70+
aspect = bw / bh
71+
aspect_ok = _CARD_AR_MIN <= aspect <= _CARD_AR_MAX
72+
return fill, centered, aspect_ok
73+
74+
75+
def assess_card_image(image_bytes: bytes) -> CardGateResult:
76+
"""
77+
Decide whether ``image_bytes`` looks like a card worth sending to OCR.
78+
79+
Never raises — any decode/processing error is reported as a non-card so the
80+
caller simply skips the frame (no Groq call).
81+
"""
82+
try:
83+
img = Image.open(io.BytesIO(image_bytes))
84+
img = ImageOps.exif_transpose(img).convert("L")
85+
except Exception: # noqa: BLE001 - any malformed upload is just "not a card"
86+
return CardGateResult(False, "unreadable", 0.0, 0.0, 0.0)
87+
88+
img.thumbnail((_ANALYZE_EDGE, _ANALYZE_EDGE))
89+
w, h = img.size
90+
if w < 48 or h < 48:
91+
return CardGateResult(False, "too_small", 0.0, 0.0, 0.0)
92+
93+
detail = float(ImageStat.Stat(img).stddev[0])
94+
edges = img.filter(ImageFilter.FIND_EDGES)
95+
focus = float(ImageStat.Stat(edges).stddev[0])
96+
fill, centered, aspect_ok = _bbox_metrics(edges, w, h)
97+
98+
if detail < _MIN_DETAIL_STDDEV:
99+
return CardGateResult(False, "empty", detail, focus, fill)
100+
if focus < _MIN_FOCUS:
101+
return CardGateResult(False, "blurry", detail, focus, fill)
102+
if fill < _MIN_FILL or not centered or not aspect_ok:
103+
return CardGateResult(False, "no_card", detail, focus, fill)
104+
return CardGateResult(True, "ok", detail, focus, fill)

api/services/scan_stream_service.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from core.database import SessionLocal
2828
from models.collection_card import CollectionCard
2929
from services import collection_card_service
30+
from services.card_image_gate import assess_card_image
3031
from services.collection_card_lookup_service import fetch_card_for_collection
3132
from services.ocr_service import extract_card_from_bytes
3233
from services.scan_service import detect_physical_language_from_ocr
@@ -40,6 +41,14 @@
4041
_GROQ_PARALLELISM = 4
4142
_groq_sem = asyncio.Semaphore(_GROQ_PARALLELISM)
4243

44+
#: Per-user guards so a "cash register" frame stream becomes *one* Groq call
45+
#: per physical card instead of a burst (the source of the 429s).
46+
#: - ``_inflight``: a scan is already being processed for this user.
47+
#: - ``_last_accept``: epoch of the last frame that passed the gate.
48+
_MIN_OCR_INTERVAL_SEC = 2.5
49+
_inflight: set[int] = set()
50+
_last_accept: dict[int, float] = {}
51+
4352

4453
def _now_iso() -> float:
4554
return time.time()
@@ -139,7 +148,7 @@ def _add_or_increment(
139148
db.close()
140149

141150

142-
async def _run_scan_pipeline(
151+
async def _process_scan(
143152
*,
144153
event_id: str,
145154
user_id: int,
@@ -149,7 +158,7 @@ async def _run_scan_pipeline(
149158
physical_language: str,
150159
user_hint: str | None,
151160
) -> None:
152-
"""End-to-end processing for a single scan event."""
161+
"""End-to-end processing for a single scan event (gate already passed)."""
153162
hub = get_scan_stream_hub()
154163

155164
preview = _short_preview_data_url(image_bytes, mime)
@@ -330,6 +339,65 @@ async def _run_scan_pipeline(
330339
)
331340

332341

342+
async def _run_scan_pipeline(
343+
*,
344+
event_id: str,
345+
user_id: int,
346+
image_bytes: bytes,
347+
filename: str,
348+
mime: str,
349+
physical_language: str,
350+
user_hint: str | None,
351+
) -> None:
352+
"""
353+
Pre-Groq gate. A streaming camera sends many frames; we only let one
354+
through per real card:
355+
356+
* **in-flight guard** — a scan is already running for this user → drop.
357+
* **debounce** — last accepted frame < ``_MIN_OCR_INTERVAL_SEC`` ago → drop.
358+
* **card pre-detection** — empty / blurry / not-a-card → drop.
359+
360+
Rejections are silent (no websocket event), so the live feed only shows
361+
real cards — like the existing phone card-scanner apps. No Groq call is
362+
made unless a frame clears every check, which is what stops the 429s.
363+
"""
364+
now = time.time()
365+
if user_id in _inflight:
366+
logger.debug("scan-stream: drop (in-flight) user=%s", user_id)
367+
return
368+
if now - _last_accept.get(user_id, 0.0) < _MIN_OCR_INTERVAL_SEC:
369+
logger.debug("scan-stream: drop (debounce) user=%s", user_id)
370+
return
371+
372+
loop = asyncio.get_running_loop()
373+
gate = await loop.run_in_executor(None, lambda: assess_card_image(image_bytes))
374+
if not gate.is_card:
375+
logger.debug(
376+
"scan-stream: drop (%s) user=%s detail=%.1f focus=%.1f fill=%.2f",
377+
gate.reason,
378+
user_id,
379+
gate.detail,
380+
gate.focus,
381+
gate.fill,
382+
)
383+
return
384+
385+
_inflight.add(user_id)
386+
_last_accept[user_id] = now
387+
try:
388+
await _process_scan(
389+
event_id=event_id,
390+
user_id=user_id,
391+
image_bytes=image_bytes,
392+
filename=filename,
393+
mime=mime,
394+
physical_language=physical_language,
395+
user_hint=user_hint,
396+
)
397+
finally:
398+
_inflight.discard(user_id)
399+
400+
333401
def submit_scan(
334402
*,
335403
user_id: int,

0 commit comments

Comments
 (0)