diff --git a/.gitignore b/.gitignore
index 37350de..7e06bc9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,7 @@ api/.env
web/.env
web/goupixdex.key
web/goupixdex.key.pub
-integrations
\ No newline at end of file
+integrations
+
+# Local Cursor / agent guidelines — not versioned
+CLAUDE.md
\ No newline at end of file
diff --git a/api/.env.example b/api/.env.example
index 4f4344d..a7ef712 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -27,3 +27,7 @@ CORS_ORIGINS=*
# EBAY_CLIENT_SECRET=
# EBAY_REDIRECT_URI=https://votre-frontend/settings/marketplaces
# EBAY_USE_SANDBOX=true
+# Optional HTTP proxy for HTML scrape « vendus » (if eBay renvoie 403 depuis le serveur).
+# EBAY_SOLD_SCRAPE_PROXY=http://127.0.0.1:8888
+# Seconds between two « sold-scrape » requests per user (eBay anti-bot / burst traffic).
+# EBAY_SOLD_SCRAPE_MIN_INTERVAL_SECONDS=60
diff --git a/api/config.py b/api/config.py
index 1b5a014..1b30f53 100644
--- a/api/config.py
+++ b/api/config.py
@@ -79,6 +79,10 @@ class AppSettings(BaseSettings):
ebay_redirect_uri: str | None = None
#: Use sandbox API hosts (``auth.sandbox.ebay.com``, ``api.sandbox.ebay.com``).
ebay_use_sandbox: bool = True
+ #: Optional HTTP(S) proxy for fetching eBay « vendus » HTML (datacenter IPs are often blocked).
+ ebay_sold_scrape_proxy: str | None = None
+ #: Min seconds between two « sold-scrape » calls **per user** (limits burst traffic to eBay).
+ ebay_sold_scrape_min_interval_seconds: float = Field(default=60.0, ge=0, le=3600)
@lru_cache
diff --git a/api/requirements.txt b/api/requirements.txt
index a2554a2..e5b42fb 100644
--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -19,6 +19,9 @@ python-jose[cryptography]>=3.3.0
nodriver>=0.48.0
python-dotenv>=1.0.0
httpx>=0.27.0
+# curl_cffi: HTTP client with TLS/JA3 fingerprint impersonation (Chrome/Firefox).
+# Used by ebay_sold_scrape_service to bypass datacenter-IP 403s on ebay.fr.
+curl_cffi>=0.7.0
beautifulsoup4>=4.12.0
Pillow>=10.0.0
supabase>=2.0.0
diff --git a/api/routes/ebay_market_route.py b/api/routes/ebay_market_route.py
index c12a3aa..e7138d9 100644
--- a/api/routes/ebay_market_route.py
+++ b/api/routes/ebay_market_route.py
@@ -3,9 +3,10 @@
from __future__ import annotations
import logging
-from typing import Annotated
+from typing import Annotated, Any
from fastapi import APIRouter, Depends, HTTPException, Query, status
+from pydantic import BaseModel, Field
from app_types.ebay_browse import (
ConditionFilter,
@@ -19,6 +20,10 @@
from services.ebay_app_oauth_service import ebay_app_oauth_configured
from services.ebay_browse_service import DEFAULT_LIMIT, MAX_LIMIT, browse_search
from services.ebay_price_aggregator_service import aggregate_prices, partition_outliers
+from services.ebay_sold_scrape_rate_limit import acquire_sold_scrape_slot
+from services.ebay_sold_scrape_service import ebay_fr_sold_search_url, scrape_sold_listings
+from services.ebay_sold_top_service import aggregate_top_sold
+from services.ebay_sold_top_worker import get_job, peek_items_sample, submit_job
logger = logging.getLogger(__name__)
@@ -159,3 +164,149 @@ async def search_market(
"total_matches": total,
"warnings": warnings,
}
+
+
+@router.get("/sold-scrape", response_model=None)
+async def sold_scrape_html(
+ user: Annotated[User, Depends(get_current_user)],
+ q: Annotated[str, Query(min_length=2, max_length=256)],
+ window_hours: Annotated[float, Query(ge=1, le=720)] = 168,
+ limit: Annotated[int, Query(ge=1, le=60)] = 50,
+) -> dict[str, Any]:
+ """
+ **Completed listings** (sold) via **public eBay HTML search** — no Marketplace Insights OAuth.
+
+ May fail with bot protection (403); optional ``EBAY_SOLD_SCRAPE_PROXY`` in server env.
+ Rate-limited per user (default: one call every ``EBAY_SOLD_SCRAPE_MIN_INTERVAL_SECONDS``).
+ Window goes up to ``720`` hours (30 days).
+ """
+ app = get_settings()
+
+ # If the worker has a fresh cached top result for the same (q, window),
+ # reuse its items_sample — saves an eBay roundtrip *and* the rate-limit
+ # slot, which matters when the user just searched in Top mode and
+ # switches to List mode.
+ cached_sample = peek_items_sample(q=q.strip(), window_hours=window_hours)
+ if cached_sample is not None:
+ return {
+ "query": q.strip(),
+ "window_hours": window_hours,
+ "items": cached_sample[:limit],
+ "error": None,
+ "ebay_sold_search_url": ebay_fr_sold_search_url(
+ q=q.strip(), page_size=min(60, max(limit, 10)),
+ ),
+ "source": "ebay_html_scrape_cached_from_top",
+ "cached": True,
+ }
+
+ retry_after = await acquire_sold_scrape_slot(user.id, app.ebay_sold_scrape_min_interval_seconds)
+ if retry_after > 0:
+ iv = app.ebay_sold_scrape_min_interval_seconds
+ raise HTTPException(
+ status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+ detail=(
+ f"Rate limit: at most one eBay sold-search every {iv:g} s "
+ f"(retry in {retry_after} s)."
+ ),
+ headers={"Retry-After": str(retry_after)},
+ )
+ items, err = await scrape_sold_listings(q=q.strip(), window_hours=window_hours, limit=limit, app=app)
+ return {
+ "query": q.strip(),
+ "window_hours": window_hours,
+ "items": items,
+ "error": err,
+ "ebay_sold_search_url": ebay_fr_sold_search_url(q=q.strip(), page_size=min(60, max(limit, 10))),
+ "source": "ebay_html_scrape",
+ "cached": False,
+ }
+
+
+class SoldTopSubmitBody(BaseModel):
+ """Body for ``POST /ebay/market/sold-top`` — schedules a background scrape."""
+
+ q: str = Field(min_length=2, max_length=256)
+ window_hours: float = Field(default=168, ge=1, le=720)
+ pages: int = Field(default=10, ge=1, le=20)
+ scrape_limit: int = Field(default=600, ge=10, le=1000)
+ top_limit: int = Field(default=20, ge=1, le=100)
+ min_count: int = Field(default=1, ge=1, le=20)
+
+
+@router.post("/sold-top", response_model=None, status_code=status.HTTP_202_ACCEPTED)
+async def sold_top_submit(
+ user: Annotated[User, Depends(get_current_user)],
+ body: SoldTopSubmitBody,
+) -> dict[str, Any]:
+ """
+ Submit a background top-sold scrape job and return its ``job_id``
+ (consumed via ``GET /ebay/market/sold-top/{job_id}``).
+
+ When a fresh cached result (TTL 15 min) exists for the same parameters,
+ the job comes back already in ``status="completed"`` with its
+ ``result`` populated — no eBay scrape triggered. The per-user rate-limit
+ only fires when an actual scrape is launched.
+ """
+ app = get_settings()
+ job = submit_job(
+ user_id=user.id,
+ q=body.q.strip(),
+ window_hours=body.window_hours,
+ pages=body.pages,
+ scrape_limit=body.scrape_limit,
+ top_limit=body.top_limit,
+ min_count=body.min_count,
+ app=app,
+ )
+
+ cache_hit = job.status == "completed" and job.result is not None
+ if not cache_hit:
+ retry_after = await acquire_sold_scrape_slot(
+ user.id, app.ebay_sold_scrape_min_interval_seconds,
+ )
+ if retry_after > 0:
+ iv = app.ebay_sold_scrape_min_interval_seconds
+ raise HTTPException(
+ status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+ detail=(
+ f"Rate limit: at most one eBay sold-search every {iv:g} s "
+ f"(retry in {retry_after} s)."
+ ),
+ headers={"Retry-After": str(retry_after)},
+ )
+
+ return {
+ **job.to_public(),
+ "ebay_sold_search_url": ebay_fr_sold_search_url(q=body.q.strip(), page_size=60),
+ "cached": cache_hit,
+ }
+
+
+@router.get("/sold-top/{job_id}", response_model=None)
+async def sold_top_status(
+ user: Annotated[User, Depends(get_current_user)],
+ job_id: str,
+) -> dict[str, Any]:
+ """
+ Return the current state of a ``sold-top`` job.
+
+ The client polls this endpoint while ``status`` is ``pending`` or
+ ``running``. Once ``completed`` (or ``failed``), ``result`` is populated
+ and polling can stop. A job may only be read by its creator.
+ """
+ job = get_job(job_id)
+ if job is None:
+ raise HTTPException(
+ status_code=status.HTTP_404_NOT_FOUND,
+ detail="Unknown or expired job.",
+ )
+ if job.user_id != user.id:
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="This job does not belong to you.",
+ )
+ return {
+ **job.to_public(),
+ "ebay_sold_search_url": ebay_fr_sold_search_url(q=job.q, page_size=60),
+ }
diff --git a/api/scripts/debug_ebay_scrape.py b/api/scripts/debug_ebay_scrape.py
new file mode 100644
index 0000000..64d132e
--- /dev/null
+++ b/api/scripts/debug_ebay_scrape.py
@@ -0,0 +1,138 @@
+"""
+One-shot diagnostic for ``ebay_sold_scrape_service``.
+
+Fetches the same URL the production service uses, dumps the HTML to
+``/tmp/ebay-sold.html``, and reports how many elements match candidate
+selectors so we can pick the right one when eBay rotates its SRP layout.
+
+Run from the ``api/`` directory:
+
+ python -m scripts.debug_ebay_scrape "carte pokemon"
+"""
+
+from __future__ import annotations
+
+import asyncio
+import sys
+from pathlib import Path
+
+from bs4 import BeautifulSoup
+
+from services.ebay_sold_scrape_service import (
+ _parse_sold_rows,
+ fetch_sold_listings_html,
+)
+
+OUT = Path("/tmp/ebay-sold.html")
+
+#: Candidate selectors to probe. Order is informational; we report counts for all.
+_CANDIDATE_SELECTORS = (
+ "li.s-item",
+ "ul.srp-results > li",
+ ".srp-results .s-item",
+ ".srp-results .s-item__wrapper",
+ "li.s-item__pl-on-bottom",
+ "[data-testid='srp-results'] li",
+ "[data-view*='mi:1686'] li",
+ "ul.b-list__items_nofooter li",
+ "li[data-viewport]",
+ "div.s-card",
+)
+
+#: Selectors that often hold the relative/absolute « sold » caption.
+_CAPTION_SELECTORS = (
+ ".s-item__caption--signal",
+ ".s-item__title--tagblock",
+ ".s-item__subtitle",
+ ".s-card__caption",
+ "[class*='caption']",
+)
+
+
+async def main(q: str) -> None:
+ html = await fetch_sold_listings_html(q=q, page_size=50)
+ OUT.write_text(html, encoding="utf-8")
+ print(f"saved html ({len(html)} bytes) → {OUT}")
+
+ soup = BeautifulSoup(html, "html.parser")
+
+ title = soup.select_one("title")
+ print(f"
: {title.get_text(strip=True) if title else '(none)'}")
+
+ h1 = soup.select_one("h1")
+ print(f": {h1.get_text(' ', strip=True)[:120] if h1 else '(none)'}")
+
+ # Quick consent-page heuristic
+ consent_markers = ("consent", "consentement", "accepter", "vos choix")
+ head_excerpt = html[:4000].lower()
+ if any(tok in head_excerpt for tok in consent_markers):
+ print("⚠️ consent-related token found in first 4 KB — possible CMP page")
+
+ print("\n-- selector probe --")
+ for sel in _CANDIDATE_SELECTORS:
+ try:
+ n = len(soup.select(sel))
+ except Exception as exc: # invalid selector etc.
+ n = f"ERR({exc})"
+ print(f" {sel:55s} → {n}")
+
+ print("\n-- existing parser --")
+ rows = _parse_sold_rows(html)
+ print(f" _parse_sold_rows: {len(rows)} rows")
+ for r in rows[:3]:
+ print(f" title={r.title[:60]!r} caption={r.sold_caption!r} hours_ago={r.approx_hours_ago}")
+
+ # If selector probe found something useful, sample captions
+ print("\n-- sample captions from first li.s-item or fallback --")
+ sample_lis = soup.select("li.s-item") or soup.select("li.s-item__pl-on-bottom") or soup.select("ul.srp-results > li")
+ for i, li in enumerate(sample_lis[:5]):
+ for csel in _CAPTION_SELECTORS:
+ cap = li.select_one(csel)
+ if cap:
+ print(f" li#{i} via {csel}: {cap.get_text(' ', strip=True)[:120]!r}")
+ break
+ else:
+ print(f" li#{i} (no caption matched any selector)")
+
+ # Field probes on the first 2 LIs so we can pin down the new s-card selectors
+ field_probes: dict[str, tuple[str, ...]] = {
+ "title": (
+ ".s-card__title", ".s-card__title-link",
+ "[role='heading']", "[role=heading]",
+ "a .su-styled-text", ".s-item__title", ".s-item__title span",
+ ),
+ "price": (".s-card__price", ".s-item__price", "[class*='price']"),
+ "link": ("a.su-link", "a[href*='/itm/']", "a.s-item__link"),
+ "image": (
+ ".s-card__image img", ".s-card__image-wrapper img",
+ "img.s-item__image-img", ".image-treatment img", "img",
+ ),
+ "caption": _CAPTION_SELECTORS,
+ }
+
+ print("\n-- field selector probe (first 2 LIs) --")
+ for i, li in enumerate(sample_lis[:2]):
+ print(f"\n[li #{i}]")
+ for field, sels in field_probes.items():
+ for s in sels:
+ el = li.select_one(s)
+ if not el:
+ continue
+ if field == "link":
+ snippet = (el.get("href") or "")[:120]
+ elif field == "image":
+ snippet = (el.get("src") or el.get("data-src") or "")[:120]
+ else:
+ snippet = el.get_text(" ", strip=True)[:120]
+ print(f" {field:7s} via {s:35s} → {snippet!r}")
+ break
+ else:
+ print(f" {field:7s} no match")
+ # Also dump the LI's outer HTML head (200 chars) so we can see attributes
+ outer = str(li)[:300].replace("\n", " ")
+ print(f" outer[:300]: {outer}")
+
+
+if __name__ == "__main__":
+ query = sys.argv[1] if len(sys.argv) > 1 else "carte pokemon"
+ asyncio.run(main(query))
diff --git a/api/services/ebay_app_oauth_service.py b/api/services/ebay_app_oauth_service.py
index ef551b8..acf549e 100644
--- a/api/services/ebay_app_oauth_service.py
+++ b/api/services/ebay_app_oauth_service.py
@@ -55,11 +55,12 @@ async def _request_app_token(app: AppSettings) -> dict[str, Any]:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(_token_url(app), data=data, headers=headers)
if resp.status_code >= 400:
- logger.warning(
- "eBay app token request failed: %s %s",
- resp.status_code,
- resp.text[:500],
- )
+ if resp.status_code >= 500:
+ logger.warning(
+ "eBay app token request failed: %s %s",
+ resp.status_code,
+ resp.text[:500],
+ )
resp.raise_for_status()
return resp.json()
diff --git a/api/services/ebay_sold_scrape_rate_limit.py b/api/services/ebay_sold_scrape_rate_limit.py
new file mode 100644
index 0000000..8ac8376
--- /dev/null
+++ b/api/services/ebay_sold_scrape_rate_limit.py
@@ -0,0 +1,27 @@
+"""In-memory per-user rate limit for eBay « vendus » HTML scrape (reduces bot flags on eBay)."""
+
+from __future__ import annotations
+
+import asyncio
+import time
+
+_lock = asyncio.Lock()
+_last: dict[int, float] = {}
+
+
+async def acquire_sold_scrape_slot(user_id: int, min_interval_sec: float) -> int:
+ """
+ Enforce at most one allowed request per ``min_interval_sec`` per user (monotonic clock).
+
+ :returns: ``0`` if the caller may proceed; else whole seconds to wait (for ``Retry-After``).
+ """
+ if min_interval_sec <= 0:
+ return 0
+ now = time.monotonic()
+ async with _lock:
+ last = _last.get(user_id)
+ if last is not None and (now - last) < min_interval_sec:
+ wait = min_interval_sec - (now - last)
+ return max(1, int(wait + 0.999))
+ _last[user_id] = now
+ return 0
diff --git a/api/services/ebay_sold_scrape_service.py b/api/services/ebay_sold_scrape_service.py
new file mode 100644
index 0000000..ad02e0f
--- /dev/null
+++ b/api/services/ebay_sold_scrape_service.py
@@ -0,0 +1,577 @@
+"""
+Best-effort scrape of eBay **sold / completed** listing search (HTML).
+
+No Marketplace Insights API — uses the public search URL like a browser.
+**Fragile:** eBay often returns **403** for datacenter IPs.
+
+Uses ``curl_cffi`` with Chrome TLS/JA3 impersonation to defeat fingerprint-based
+blocks (eBay's edge fingerprints datacenter clients via JA3, not just IP). A
+warm-up GET on ``ebay.fr`` first collects session cookies, mirroring a real
+browser flow. An optional ``EBAY_SOLD_SCRAPE_PROXY`` is still honored as a
+last-resort fallback when impersonation alone is not enough.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import re
+import time
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Awaitable, Callable
+from urllib.parse import urlencode
+
+from bs4 import BeautifulSoup
+from curl_cffi.requests import AsyncSession
+
+from config import AppSettings, get_settings
+
+logger = logging.getLogger(__name__)
+
+EBAY_FR_HOME = "https://www.ebay.fr/"
+EBAY_FR_SOLD_BASE = "https://www.ebay.fr/sch/i.html"
+
+#: Default impersonation profiles tried in order when no browser cookie source
+#: drives the choice (Chrome JA3 first, Safari JA3 fallback).
+_IMPERSONATE_PROFILES: tuple[str, ...] = ("chrome", "safari17_0")
+
+#: When we inject cookies harvested from a specific browser, we want the TLS /
+#: HTTP/2 fingerprint to *match* that browser — otherwise eBay's edge sees
+#: « cookies issued to Firefox JA3, presented over Chrome JA3 » and trips the
+#: anti-bot challenge. ``curl_cffi`` 0.7+ ships these named profiles.
+_PROFILES_BY_BROWSER: dict[str, tuple[str, ...]] = {
+ "firefox": ("firefox133", "firefox110", "chrome"),
+ "chrome": ("chrome", "safari17_0"),
+ "chromium": ("chrome", "safari17_0"),
+ "brave": ("chrome", "safari17_0"),
+ "edge": ("chrome", "safari17_0"),
+ "safari": ("safari17_0", "chrome"),
+}
+
+#: Extra headers on top of those auto-set by impersonation. We only force the
+#: language: Chrome defaults to ``en-US`` which is suspicious for ebay.fr.
+_EXTRA_HEADERS: dict[str, str] = {
+ "Accept-Language": "fr-FR,fr;q=0.9,en;q=0.8",
+}
+
+
+class EbayScrapeError(Exception):
+ """Raised when the eBay HTML fetch fails (HTTP >= 400 or transport error)."""
+
+ def __init__(self, *, status_code: int | None, message: str) -> None:
+ self.status_code = status_code
+ super().__init__(message)
+
+
+@dataclass(frozen=True)
+class ScrapedSoldRow:
+ title: str
+ price_eur: float | None
+ listing_url: str
+ image_url: str | None
+ item_id: str | None
+ sold_caption: str | None
+ approx_hours_ago: float | None
+
+
+def ebay_fr_sold_search_url(*, q: str, page_size: int = 50, page: int = 1) -> str:
+ """Human-readable same search as the scraper (for opening in a browser)."""
+ params: dict[str, str] = {
+ "_nkw": q.strip(),
+ "LH_Sold": "1",
+ "LH_Complete": "1",
+ "_sop": "13",
+ "_ipg": str(min(max(page_size, 10), 60)),
+ "rt": "nc",
+ }
+ if page > 1:
+ params["_pgn"] = str(page)
+ return f"{EBAY_FR_SOLD_BASE}?{urlencode(params)}"
+
+
+def _extract_item_id(url: str) -> str | None:
+ m = re.search(r"/itm/(\d{6,20})", url)
+ return m.group(1) if m else None
+
+
+def _parse_eur_price(raw: str) -> float | None:
+ t = (raw or "").replace("EUR", " ").replace("€", " ").strip()
+ if not t:
+ return None
+ m = re.search(r"(\d[\d\s]*[,.]\d{2})\s*$", t.replace(" ", ""))
+ if not m:
+ m = re.search(r"(\d[\d\s]{0,12})", t.replace(" ", ""))
+ if not m:
+ return None
+ num = m.group(1).replace(" ", "").replace(".", "").replace(",", ".", 1)
+ try:
+ v = float(num)
+ except ValueError:
+ return None
+ return round(v, 2) if v > 0 else None
+
+
+_RELATIVE_FR = (
+ (re.compile(r"il y a\s+(\d+)\s*heure", re.I), lambda n: float(n)),
+ (re.compile(r"il y a\s+(\d+)\s*jour", re.I), lambda n: float(n) * 24.0),
+ (re.compile(r"il y a\s+(\d+)\s*minute", re.I), lambda n: float(n) / 60.0),
+)
+
+#: French month tokens normalized to ASCII (no accent, trailing dot stripped).
+_FR_MONTHS: dict[str, int] = {
+ "janv": 1, "janvier": 1, "jan": 1,
+ "fevr": 2, "fev": 2, "fevrier": 2,
+ "mars": 3, "mar": 3,
+ "avr": 4, "avril": 4,
+ "mai": 5,
+ "juin": 6,
+ "juil": 7, "juillet": 7,
+ "aout": 8,
+ "sept": 9, "sep": 9, "septembre": 9,
+ "oct": 10, "octobre": 10,
+ "nov": 11, "novembre": 11,
+ "dec": 12, "decembre": 12,
+}
+
+#: Matches a French short or long month name (with or without accent) in a caption.
+_FR_DATE_RX = re.compile(
+ r"(?P\d{1,2})\s*"
+ r"(?Pjanv\.?|janvier|jan\.?|f[eé]vr?\.?|f[eé]vrier|mars|mar\.?|"
+ r"avr\.?|avril|mai|juin|juil\.?|juillet|ao[uû]t|sept\.?|sep\.?|septembre|"
+ r"oct\.?|octobre|nov\.?|novembre|d[eé]c\.?|d[eé]cembre)"
+ r"\.?\s*(?P\d{4})?",
+ re.IGNORECASE,
+)
+
+
+def _normalize_month_token(token: str) -> str:
+ t = token.lower().rstrip(".")
+ for a, b in (("é", "e"), ("è", "e"), ("ê", "e"), ("û", "u"), ("ù", "u")):
+ t = t.replace(a, b)
+ return t
+
+
+def _approx_hours_from_caption(caption: str, *, now: datetime | None = None) -> float | None:
+ s = (caption or "").strip()
+ if not s:
+ return None
+ for rx, fn in _RELATIVE_FR:
+ m = rx.search(s)
+ if m:
+ try:
+ return fn(float(m.group(1)))
+ except (TypeError, ValueError):
+ continue
+ m = _FR_DATE_RX.search(s)
+ if not m:
+ return None
+ month = _FR_MONTHS.get(_normalize_month_token(m.group("month")))
+ if month is None:
+ return None
+ try:
+ day = int(m.group("day"))
+ except (TypeError, ValueError):
+ return None
+ current = now or datetime.now(timezone.utc)
+ if m.group("year"):
+ year = int(m.group("year"))
+ else:
+ year = current.year
+ try:
+ candidate = datetime(year, month, day, 12, 0, tzinfo=timezone.utc)
+ except ValueError:
+ return None
+ if candidate > current:
+ year -= 1
+ try:
+ sold = datetime(year, month, day, 12, 0, tzinfo=timezone.utc)
+ except ValueError:
+ return None
+ delta_hours = (current - sold).total_seconds() / 3600.0
+ return max(delta_hours, 0.0)
+
+
+#: Trailing screen-reader text appended to titles in the new ``s-card`` layout.
+_SR_ONLY_TITLE_RX = re.compile(r"\s*La page s['’]ouvre.*$", re.I)
+
+
+def _clean_title(text: str) -> str:
+ return _SR_ONLY_TITLE_RX.sub("", text or "").strip()
+
+
+def _parse_sold_rows(html: str) -> list[ScrapedSoldRow]:
+ soup = BeautifulSoup(html, "html.parser")
+ # New layout (2025+): ````.
+ lis = soup.select("li.s-card")
+ if not lis:
+ lis = soup.select("li.s-item") # legacy fallback
+
+ rows: list[ScrapedSoldRow] = []
+ for li in lis:
+ li_classes = " ".join(li.get("class") or [])
+ if "s-item--watch-at-corner" in li_classes:
+ continue
+
+ title_el = (
+ li.select_one(".s-card__title")
+ or li.select_one(".s-item__title span")
+ or li.select_one(".s-item__title")
+ )
+ title = _clean_title(title_el.get_text(" ", strip=True) if title_el else "")
+ if not title or title.lower().startswith("montrez-vous") or "sponsoris" in title.lower():
+ continue
+
+ # Prefer ``data-listingid`` (canonical, present on every s-card LI). The
+ # visible ``a.su-link`` in the new layout points to a *search results*
+ # URL, not to the listing — so reconstruct the listing URL ourselves.
+ item_id = (li.get("data-listingid") or "").strip() or None
+ listing_url = ""
+ if item_id and item_id.isdigit():
+ listing_url = f"https://www.ebay.fr/itm/{item_id}"
+ else:
+ link_el = li.select_one("a[href*='/itm/']") or li.select_one("a.s-item__link")
+ href = str(link_el.get("href") or "").strip() if link_el else ""
+ if href.startswith("http"):
+ listing_url = href.split("?")[0]
+ item_id = item_id or _extract_item_id(href)
+ if not listing_url:
+ continue
+
+ price_el = li.select_one(".s-card__price") or li.select_one(".s-item__price")
+ price_txt = price_el.get_text(" ", strip=True) if price_el else ""
+ price = _parse_eur_price(price_txt)
+
+ img_el = (
+ li.select_one(".s-card__image img")
+ or li.select_one(".image-treatment img")
+ or li.select_one("img.s-item__image-img")
+ or li.select_one(".s-item__image-img")
+ )
+ img_src = ""
+ if img_el is not None:
+ img_src = str(img_el.get("src") or "").strip() or str(img_el.get("data-src") or "").strip()
+ img_url = img_src if img_src.startswith("http") else None
+
+ cap_el = (
+ li.select_one(".s-card__caption")
+ or li.select_one(".s-item__subtitle")
+ or li.select_one(".s-item__caption--signal")
+ )
+ cap_txt = cap_el.get_text(" ", strip=True) if cap_el else ""
+ approx = _approx_hours_from_caption(cap_txt)
+
+ rows.append(
+ ScrapedSoldRow(
+ title=title,
+ price_eur=price,
+ listing_url=listing_url,
+ image_url=img_url,
+ item_id=item_id,
+ sold_caption=cap_txt or None,
+ approx_hours_ago=approx,
+ ),
+ )
+ return rows
+
+
+#: Substrings present on eBay's anti-bot interstitial (« Nous sommes désolés / Vérification de
+#: votre navigateur avant d'accéder à eBay »). The page is also dramatically smaller than a real SRP.
+_BOT_CHALLENGE_TOKENS: tuple[str, ...] = (
+ "vérification de votre navigateur",
+ "verification de votre navigateur",
+ "nous sommes désolés",
+ "nous sommes desoles",
+ "access denied",
+ "security measure",
+ "pardon our interruption",
+)
+
+
+def _looks_like_bot_challenge(html: str) -> bool:
+ """Heuristic: very short HTML *and* a known interstitial phrase appears in it."""
+ if len(html) >= 200_000:
+ return False
+ head = html[:8000].lower()
+ return any(tok in head for tok in _BOT_CHALLENGE_TOKENS)
+
+
+def _filter_window(rows: list[ScrapedSoldRow], *, window_hours: float) -> list[ScrapedSoldRow]:
+ """Keep rows whose relative sold time could be parsed and fits the window."""
+ out: list[ScrapedSoldRow] = []
+ for r in rows:
+ if r.approx_hours_ago is None:
+ continue
+ if r.approx_hours_ago <= window_hours + 0.5:
+ out.append(r)
+ return out
+
+
+#: Cached eBay cookies from the local user's browser. Refreshed every 5 min so
+#: that re-authentication / challenge resolution in the browser is picked up
+#: without restarting the API. Empty dict on platforms / setups where no
+#: browser cookie store is reachable (e.g. headless VPS).
+_BROWSER_COOKIE_CACHE: dict[str, Any] = {"loaded_at": 0.0, "value": {}}
+_BROWSER_COOKIE_TTL_SECONDS = 300.0
+#: Shorter TTL when no cookies were found, so a freshly-loaded browser is picked
+#: up quickly instead of being masked by a 5-minute miss-cache.
+_BROWSER_COOKIE_TTL_EMPTY = 30.0
+
+
+def _read_browser_cookies() -> tuple[dict[str, str], str | None]:
+ """
+ Best-effort: read ebay.fr / ebay.com cookies from the local user's browser
+ so that a human-passed challenge cookie can flow into our automated request.
+
+ Returns ``({}, None)`` when no browser cookie store is accessible (typical
+ on a headless server). The second tuple element identifies the source
+ browser (e.g. ``"firefox"``) so the caller can pick a matching TLS profile.
+ Cached for 5 min on success / 30 s on miss.
+ """
+ now = time.time()
+ cached_value: dict[str, str] = dict(_BROWSER_COOKIE_CACHE["value"])
+ cached_source: str | None = _BROWSER_COOKIE_CACHE.get("source") # type: ignore[assignment]
+ age = now - float(_BROWSER_COOKIE_CACHE["loaded_at"])
+ ttl = _BROWSER_COOKIE_TTL_SECONDS if cached_value else _BROWSER_COOKIE_TTL_EMPTY
+ if age < ttl:
+ return cached_value, cached_source
+
+ cookies: dict[str, str] = {}
+ source: str | None = None
+ try:
+ import browser_cookie3 # type: ignore[import-untyped]
+ except ImportError:
+ logger.warning("browser_cookie3 not installed — install via `pip install browser-cookie3`")
+ _BROWSER_COOKIE_CACHE["loaded_at"] = now
+ _BROWSER_COOKIE_CACHE["value"] = cookies
+ _BROWSER_COOKIE_CACHE["source"] = source
+ return cookies, source
+
+ attempts: list[str] = []
+ # Probe order favors browsers we have a matching impersonation profile for.
+ for name in ("chrome", "safari", "firefox", "edge", "brave", "chromium"):
+ loader = getattr(browser_cookie3, name, None)
+ if not callable(loader):
+ continue
+ try:
+ jar = loader(domain_name="ebay")
+ except Exception as exc:
+ attempts.append(f"{name}=err({type(exc).__name__})")
+ continue
+ bucket: dict[str, str] = {}
+ for c in jar:
+ domain = (c.domain or "").lstrip(".")
+ if domain.endswith("ebay.fr") or domain.endswith("ebay.com"):
+ bucket[c.name] = c.value
+ attempts.append(f"{name}={len(bucket)}")
+ if bucket and not cookies:
+ cookies = bucket
+ source = name
+ logger.info("Loaded %d eBay cookie(s) from browser=%s", len(cookies), name)
+
+ if not cookies:
+ logger.warning("No eBay cookies found in any browser. Probes: %s", ", ".join(attempts) or "none")
+
+ _BROWSER_COOKIE_CACHE["loaded_at"] = now
+ _BROWSER_COOKIE_CACHE["value"] = cookies
+ _BROWSER_COOKIE_CACHE["source"] = source
+ return cookies, source
+
+
+async def _fetch_with_profile(
+ *,
+ url: str,
+ profile: str,
+ proxies: dict[str, str] | None,
+ cookies: dict[str, str],
+) -> str:
+ """One attempt: warm-up GET on the eBay home, then the search URL."""
+ async with AsyncSession(impersonate=profile, timeout=45, proxies=proxies) as session:
+ try:
+ await session.get(
+ EBAY_FR_HOME,
+ headers=_EXTRA_HEADERS,
+ cookies=cookies or None,
+ allow_redirects=True,
+ )
+ except Exception as exc:
+ logger.debug("eBay warm-up GET failed (%s): %s", profile, exc)
+
+ try:
+ resp = await session.get(
+ url,
+ headers=_EXTRA_HEADERS,
+ cookies=cookies or None,
+ allow_redirects=True,
+ )
+ except Exception as exc:
+ raise EbayScrapeError(status_code=None, message=str(exc)) from exc
+
+ if resp.status_code >= 400:
+ logger.warning(
+ "eBay sold HTML fetch failed (%s) status=%s len=%s",
+ profile, resp.status_code, len(resp.text),
+ )
+ raise EbayScrapeError(status_code=resp.status_code, message=f"HTTP {resp.status_code}")
+ return resp.text
+
+
+async def fetch_sold_listings_html(
+ *,
+ q: str,
+ page_size: int = 50,
+ page: int = 1,
+ app: AppSettings | None = None,
+) -> str:
+ """
+ Fetch the sold-listings HTML, trying each impersonation profile in turn.
+
+ If the primary profile yields a bot-challenge interstitial, retries once with
+ the fallback profile after a brief pause. Returns the last HTML we obtained;
+ the caller detects the challenge to surface a clean error to the user.
+ """
+ s = app or get_settings()
+ url = ebay_fr_sold_search_url(q=q, page_size=page_size, page=page)
+ proxy = (s.ebay_sold_scrape_proxy or "").strip() or None
+ proxies = {"http": proxy, "https": proxy} if proxy else None
+
+ cookies, cookie_source = _read_browser_cookies()
+ profiles = _PROFILES_BY_BROWSER.get(cookie_source or "", _IMPERSONATE_PROFILES)
+ if cookie_source:
+ logger.info("Using impersonation profiles=%s to match cookie source=%s", profiles, cookie_source)
+
+ last_html = ""
+ last_err: EbayScrapeError | None = None
+ for idx, profile in enumerate(profiles):
+ if idx > 0:
+ await asyncio.sleep(1.5) # brief pause before profile rotation
+ try:
+ html = await _fetch_with_profile(
+ url=url, profile=profile, proxies=proxies, cookies=cookies,
+ )
+ except EbayScrapeError as exc:
+ last_err = exc
+ continue
+ except Exception as exc:
+ # curl_cffi raises if an impersonation profile string is unknown to
+ # the bundled libcurl-impersonate (e.g. ``firefox133`` on an older
+ # build). Skip to the next profile rather than 500-ing.
+ logger.warning("Impersonation profile %s unavailable: %s", profile, exc)
+ continue
+ last_html = html
+ if not _looks_like_bot_challenge(html):
+ return html
+ logger.info("eBay challenge with profile=%s — trying next", profile)
+
+ if last_html:
+ return last_html # caller will detect the challenge
+ if last_err:
+ raise last_err
+ raise EbayScrapeError(status_code=None, message="no impersonation profile succeeded")
+
+
+async def scrape_sold_listings(
+ *,
+ q: str,
+ window_hours: float,
+ limit: int = 50,
+ pages: int = 1,
+ app: AppSettings | None = None,
+ on_page_done: Callable[[int, int], Awaitable[None]] | None = None,
+) -> tuple[list[dict[str, Any]], str | None]:
+ """
+ Return ``(items_as_dicts, error_message_or_none)``.
+
+ With ``pages > 1``, the scraper paginates the eBay results and merges
+ them, deduplicating by ``item_id``. A bot challenge or transport error
+ on any page short-circuits and returns whatever was collected so far
+ along with the matching error message. A small politeness pause sits
+ between pages.
+
+ ``on_page_done`` is awaited after each successfully parsed page with
+ ``(page_num, total_unique_observed_so_far)`` — useful for surfacing
+ progress to a long-running job consumer.
+
+ On success ``error_message_or_none`` is ``None``.
+ """
+ pages_total = max(1, min(int(pages), 20))
+ page_size = min(60, max(limit, 10))
+
+ raw_rows: list[Any] = []
+ seen_item_ids: set[str] = set()
+ err_msg: str | None = None
+
+ for page_num in range(1, pages_total + 1):
+ if page_num > 1:
+ await asyncio.sleep(0.8)
+ try:
+ html = await fetch_sold_listings_html(
+ q=q,
+ page_size=page_size,
+ page=page_num,
+ app=app,
+ )
+ except EbayScrapeError as exc:
+ if exc.status_code is None:
+ logger.warning("eBay sold scrape transport error (page %d): %s", page_num, exc)
+ err_msg = f"Erreur réseau lors du téléchargement de la page eBay : {exc}"
+ else:
+ err_msg = (
+ f"eBay a refusé la page HTML (HTTP {exc.status_code}). "
+ f"Réessayez plus tard ou ouvrez la recherche « vendus » dans le navigateur."
+ )
+ break
+
+ if _looks_like_bot_challenge(html):
+ logger.warning("eBay served bot-challenge page %d (len=%d)", page_num, len(html))
+ err_msg = (
+ "eBay a affiché une page de vérification anti-bot. "
+ "Ouvrez ebay.fr dans Safari ou Chrome sur cette machine, laissez la page se charger "
+ "(le challenge se résout tout seul), puis réessayez : l'API réutilisera vos cookies "
+ "de navigateur. À défaut, attendez 30–60 min et utilisez le lien manuel ci-dessous."
+ )
+ break
+
+ page_rows = _parse_sold_rows(html)
+ new_in_page = 0
+ for r in page_rows:
+ if r.item_id and r.item_id in seen_item_ids:
+ continue
+ if r.item_id:
+ seen_item_ids.add(r.item_id)
+ raw_rows.append(r)
+ new_in_page += 1
+
+ if on_page_done is not None:
+ try:
+ await on_page_done(page_num, len(raw_rows))
+ except Exception:
+ logger.exception("on_page_done callback raised — ignoring")
+
+ # eBay quietly stops returning new listings past the available pages —
+ # break early once a page yields nothing new.
+ if new_in_page == 0 and page_num > 1:
+ break
+
+ in_window = _filter_window(raw_rows, window_hours=window_hours)
+ rows = in_window[:limit]
+ if not rows:
+ logger.info(
+ "eBay sold scrape empty: q=%r window=%sh pages=%d parsed=%d in_window=%d",
+ q, window_hours, pages_total, len(raw_rows), len(in_window),
+ )
+
+ items: list[dict[str, Any]] = []
+ for r in rows:
+ items.append(
+ {
+ "title": r.title,
+ "price_eur": r.price_eur,
+ "listing_url": r.listing_url,
+ "image_url": r.image_url,
+ "item_id": r.item_id,
+ "sold_caption": r.sold_caption,
+ "approx_hours_ago": r.approx_hours_ago,
+ },
+ )
+ return items, err_msg
diff --git a/api/services/ebay_sold_top_service.py b/api/services/ebay_sold_top_service.py
new file mode 100644
index 0000000..a5896ee
--- /dev/null
+++ b/api/services/ebay_sold_top_service.py
@@ -0,0 +1,358 @@
+"""
+Aggregate scraped sold-listings into a "top sold cards" ranking.
+
+Strategy: build a coarse fingerprint per listing title (set/card-number when
+parseable, otherwise the first few significant tokens), keep the grade
+(PSA / BGS / CGC) on a separate axis so a graded card never collapses with
+its raw counterpart, then group + count + price stats.
+
+The fingerprint is intentionally lossy: « Pikachu VMAX 044/185 Vivid Voltage »
+and « Pikachu VMAX Vivid Voltage 44/185 PSA10 » will land in the same group
+when ungraded — the second goes to a separate ``PSA 10`` bucket.
+"""
+
+from __future__ import annotations
+
+import re
+import unicodedata
+from statistics import median
+from typing import Any, Literal
+
+
+Category = Literal["cards", "graded", "sealed"]
+
+_CARD_NUMBER_RX = re.compile(r"\b(\d{1,3})\s*/\s*(\d{1,3})\b")
+
+#: Recognised grading companies. PCA (Pokémon Card Authentication, FR) and
+#: CCC / CGG appear regularly on ebay.fr listings even though they are smaller
+#: than PSA / BGS / CGC. Order in the alternation does not matter — the regex
+#: is anchored on word boundaries.
+_GRADERS: tuple[str, ...] = (
+ "psa",
+ "bgs",
+ "cgc",
+ "cgg",
+ "ccc",
+ "pca",
+ "beckett",
+ "ace",
+ "sgc",
+ "hga",
+ "tag",
+ "get",
+ "mnt",
+ "gma",
+)
+
+_GRADE_RX = re.compile(
+ r"\b(?P" + "|".join(_GRADERS) + r")\s*"
+ # Longer alternatives first so « 9.5 » beats « 9 ». Comma is the French
+ # decimal separator (« CCC 9,5 ») and is normalised to a dot below.
+ r"(?P10|9[.,]5|9|8[.,]5|8|7[.,]5|7|6[.,]5|6|5[.,]5|5)\b",
+ re.IGNORECASE,
+)
+
+#: Strong sealed-product signals (matched on the diacritic-stripped lowercased
+#: title). Order does not matter — first hit classifies the listing.
+_SEALED_STRONG_SIGNALS: tuple[str, ...] = (
+ "etb",
+ "elite trainer",
+ "trainer box",
+ "demi display",
+ "display",
+ "booster box",
+ "boite booster",
+ "boite de booster",
+ "box booster",
+ "mini tin",
+ "tin pokemon",
+ "pokemon tin",
+ "blister",
+ "tripack",
+ "tri pack",
+ "triple pack",
+ "coffret",
+ "premium collection",
+ "collection box",
+ "sleeved booster",
+ "booster bundle",
+ "build battle",
+ "ultra premium",
+ "pokebox",
+)
+
+#: Weak sealed hints often used for single cards in blister/sleeve.
+#: We only use these when no card-level hint is detected.
+_SEALED_WEAK_SIGNALS: tuple[str, ...] = (
+ "scelle",
+ "sealed",
+)
+
+#: Single-card set/promo codes commonly seen in French listings
+#: (e.g. "SWSH291", "SVP 052", "MEP031", "TG07").
+_CARD_CODE_RX = re.compile(
+ r"\b(?:svp|swsh|tg|gg|mep|sm|xy|bw|dp|hgss|sw|svp)\s*-?\s*\d{1,3}\b",
+ re.IGNORECASE,
+)
+
+_STOPWORDS: frozenset[str] = frozenset(
+ {
+ "pokemon",
+ "pokémon",
+ "carte",
+ "cartes",
+ "card",
+ "cards",
+ "tcg",
+ "ccg",
+ "the",
+ "le",
+ "la",
+ "les",
+ "un",
+ "une",
+ "des",
+ "de",
+ "du",
+ "et",
+ "and",
+ "or",
+ "fr",
+ "eng",
+ "en",
+ "ja",
+ "jp",
+ "jap",
+ "japonais",
+ "japanese",
+ "japonaise",
+ "anglais",
+ "anglaise",
+ "english",
+ "francais",
+ "francaise",
+ "french",
+ "near",
+ "mint",
+ "nm",
+ "neuf",
+ "neuve",
+ "occasion",
+ "rare",
+ "common",
+ "uncommon",
+ "holographic",
+ "officiel",
+ "officielle",
+ "original",
+ "originale",
+ "scellee",
+ "scellees",
+ "scelle",
+ "scelles",
+ "sealed",
+ "boite",
+ "promo",
+ "lot",
+ }
+)
+
+_NON_WORD = re.compile(r"[^a-z0-9]+")
+
+
+def _strip_diacritics(text: str) -> str:
+ norm = unicodedata.normalize("NFKD", text)
+ return "".join(c for c in norm if not unicodedata.combining(c))
+
+
+def _extract_grade(title_norm: str) -> str | None:
+ """Return a normalized grade label like ``PSA 10`` / ``PCA 9.5`` if present."""
+ m = _GRADE_RX.search(title_norm)
+ if not m:
+ return None
+ grade = m.group("grade").replace(",", ".")
+ return f"{m.group('co').upper()} {grade}"
+
+
+def _significant_tokens(title_norm: str) -> list[str]:
+ """Tokenize a normalized title and drop stopwords + tiny tokens."""
+ raw = _NON_WORD.sub(" ", title_norm).split()
+ out: list[str] = []
+ for tok in raw:
+ if len(tok) < 2:
+ continue
+ if tok in _STOPWORDS:
+ continue
+ out.append(tok)
+ return out
+
+
+def _classify(
+ title_norm: str,
+ *,
+ has_grade: bool,
+ has_card_number: bool,
+ has_card_code: bool,
+) -> Category:
+ """
+ Decide if the listing is a graded card, a sealed product, or a raw card.
+
+ Priority:
+
+ 1. ``has_grade`` — graded items always win, even if they happen to mention
+ a sealed-product keyword (graded sealed boosters are ultra-rare).
+ 2. ``has_card_number`` or ``has_card_code`` — single-card hints (``12/102``,
+ ``SWSH291``, ``SVP052``, ``TG07``, …). Listings using « scellé(e) » in
+ this case mean *the card is sealed in plastic*, not unopened sealed
+ product — we keep them as cards.
+ 3. Strong sealed-product keywords (ETB, display, booster box, coffret…) — sealed.
+ 4. Weak sealed hints ("scellé"/"sealed"):
+ - if single-card hints are present (promo/code set), keep as card;
+ - otherwise sealed.
+ 5. Default — raw card.
+ """
+ if has_grade:
+ return "graded"
+ if has_card_number or has_card_code:
+ return "cards"
+ for sig in _SEALED_STRONG_SIGNALS:
+ if sig in title_norm:
+ return "sealed"
+ has_weak_sealed = any(sig in title_norm for sig in _SEALED_WEAK_SIGNALS)
+ if has_weak_sealed:
+ has_card_hint = "promo" in title_norm or bool(_CARD_CODE_RX.search(title_norm))
+ if has_card_hint:
+ return "cards"
+ return "sealed"
+ return "cards"
+
+
+def _build_fingerprint(title: str) -> tuple[str, str | None, Category]:
+ """
+ Return ``(group_key, grade_label, category)``.
+
+ ``group_key`` ignores grade so the same card raw / PSA / BGS share a slug;
+ grade is carried separately and the caller groups by ``(slug, grade)``.
+ """
+ norm = _strip_diacritics(title).lower()
+ grade = _extract_grade(norm)
+ if grade:
+ norm = _GRADE_RX.sub(" ", norm)
+
+ card_match = _CARD_NUMBER_RX.search(norm)
+ has_card_code = _CARD_CODE_RX.search(norm) is not None
+ category = _classify(
+ norm,
+ has_grade=grade is not None,
+ has_card_number=card_match is not None,
+ has_card_code=has_card_code,
+ )
+ tokens = _significant_tokens(norm)
+
+ if card_match:
+ # Use the card number as the primary anchor — robust to word order
+ # and language variants ("Pikachu VMAX" vs "VMAX Pikachu").
+ a = card_match.group(1).zfill(3)
+ b = card_match.group(2).zfill(3)
+ anchor = tokens[0] if tokens else "card"
+ return f"{anchor}-{a}-{b}", grade, category
+
+ # No card number (typical for sealed): hash on the first 3 meaningful tokens.
+ if not tokens:
+ return _NON_WORD.sub("-", norm)[:40] or "unknown", grade, category
+ return "-".join(tokens[:3]), grade, category
+
+
+def _pick_display_title(titles: list[str]) -> str:
+ """Pick the longest title as it usually carries the most context."""
+ return max(titles, key=len) if titles else ""
+
+
+def aggregate_top_sold(
+ rows: list[dict[str, Any]],
+ *,
+ min_count: int = 1,
+ limit_per_category: int = 20,
+) -> dict[str, list[dict[str, Any]]]:
+ """
+ Group scraped sold rows by ``(category, fingerprint, grade)`` and rank
+ each category independently.
+
+ Returns ``{"cards": [...], "graded": [...], "sealed": [...]}``. Each list
+ is sorted by count desc, then total value desc, with a per-category
+ ``rank`` field starting at 1.
+
+ Per-group fields: ``count``, ``total_value_eur``, ``median_price_eur``,
+ ``min_price_eur``, ``max_price_eur``, ``display_title``, ``image_url``,
+ ``sample_listing_url``, ``grade`` (``None`` outside the graded bucket),
+ ``category``, ``approx_hours_min``.
+ """
+ buckets: dict[tuple[Category, str, str | None], dict[str, Any]] = {}
+ for row in rows:
+ title = (row.get("title") or "").strip()
+ if not title:
+ continue
+ slug, grade, category = _build_fingerprint(title)
+ key = (category, slug, grade)
+ b = buckets.get(key)
+ if b is None:
+ b = {
+ "category": category,
+ "fingerprint": slug,
+ "grade": grade,
+ "titles": [],
+ "prices": [],
+ "image_url": None,
+ "sample_listing_url": None,
+ "approx_hours_values": [],
+ }
+ buckets[key] = b
+ b["titles"].append(title)
+ price = row.get("price_eur")
+ if isinstance(price, (int, float)) and price > 0:
+ b["prices"].append(float(price))
+ if not b["image_url"] and row.get("image_url"):
+ b["image_url"] = row["image_url"]
+ if not b["sample_listing_url"] and row.get("listing_url"):
+ b["sample_listing_url"] = row["listing_url"]
+ h = row.get("approx_hours_ago")
+ if isinstance(h, (int, float)):
+ b["approx_hours_values"].append(float(h))
+
+ grouped: dict[str, list[dict[str, Any]]] = {"cards": [], "graded": [], "sealed": []}
+ for b in buckets.values():
+ count = len(b["titles"])
+ if count < min_count:
+ continue
+ prices: list[float] = b["prices"]
+ approx: list[float] = b["approx_hours_values"]
+ grouped[b["category"]].append(
+ {
+ "category": b["category"],
+ "fingerprint": b["fingerprint"],
+ "grade": b["grade"],
+ "display_title": _pick_display_title(b["titles"]),
+ "image_url": b["image_url"],
+ "sample_listing_url": b["sample_listing_url"],
+ "count": count,
+ "total_value_eur": round(sum(prices), 2) if prices else 0.0,
+ "median_price_eur": round(median(prices), 2) if prices else None,
+ "min_price_eur": round(min(prices), 2) if prices else None,
+ "max_price_eur": round(max(prices), 2) if prices else None,
+ "approx_hours_min": round(min(approx), 1) if approx else None,
+ },
+ )
+
+ for cat, items in grouped.items():
+ items.sort(
+ key=lambda x: (
+ -int(x["count"]),
+ -float(x["total_value_eur"] or 0),
+ x["display_title"].lower(),
+ ),
+ )
+ trimmed = items[:limit_per_category]
+ for rank, it in enumerate(trimmed, start=1):
+ it["rank"] = rank
+ grouped[cat] = trimmed
+ return grouped
diff --git a/api/services/ebay_sold_top_worker.py b/api/services/ebay_sold_top_worker.py
new file mode 100644
index 0000000..a14b692
--- /dev/null
+++ b/api/services/ebay_sold_top_worker.py
@@ -0,0 +1,301 @@
+"""
+Background worker for the eBay « top sold » scrape.
+
+The synchronous scrape can take 5-15 s (2-3 paginated GETs against eBay.fr,
+plus a polite pause between pages). This module fronts that work with an
+in-memory job queue: callers submit a job, get a ``job_id`` back, then poll
+``GET /ebay/market/sold-top/{job_id}`` until ``status == "completed"``.
+
+A short TTL cache keyed on ``(q, window_hours, pages, top_limit, min_count)``
+short-circuits identical submissions while the data is fresh, drastically
+reducing the load eBay sees from the VPS IP when several users search for
+the same popular keywords (« carte pokemon », « charizard », …).
+
+State is process-local (a plain ``dict``). For a single Uvicorn / Gunicorn
+worker this is fine; if we ever scale to N workers we'll need to either pin
+job ids to a worker (sticky session) or move state to Redis.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import secrets
+import time
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+from config import AppSettings
+from services.ebay_sold_scrape_service import scrape_sold_listings
+from services.ebay_sold_top_service import aggregate_top_sold
+
+logger = logging.getLogger(__name__)
+
+
+JobStatus = Literal["pending", "running", "completed", "failed"]
+
+
+@dataclass
+class EbaySoldTopJob:
+ """A single « top sold » scrape, with progress + result fields."""
+
+ job_id: str
+ user_id: int
+ q: str
+ window_hours: float
+ pages: int
+ scrape_limit: int
+ top_limit: int
+ min_count: int
+ status: JobStatus = "pending"
+ pages_done: int = 0
+ total_observed: int = 0
+ result: dict[str, Any] | None = None
+ error: str | None = None
+ created_at: float = field(default_factory=time.time)
+ updated_at: float = field(default_factory=time.time)
+ started_at: float | None = None
+ completed_at: float | None = None
+
+ def to_public(self) -> dict[str, Any]:
+ """Shape returned to the API client (excludes internal-only fields)."""
+ return {
+ "job_id": self.job_id,
+ "status": self.status,
+ "query": self.q,
+ "window_hours": self.window_hours,
+ "pages_requested": self.pages,
+ "pages_done": self.pages_done,
+ "total_observed": self.total_observed,
+ "result": self.result,
+ "error": self.error,
+ "created_at": self.created_at,
+ "updated_at": self.updated_at,
+ "started_at": self.started_at,
+ "completed_at": self.completed_at,
+ }
+
+
+#: Job TTL after last update — long enough for a slow client to finish polling.
+_JOB_TTL_SECONDS = 600.0
+#: Cache TTL on the result body. Tuned to absorb traffic spikes on popular
+#: queries (« carte pokemon », « charizard ») without hammering eBay.
+_RESULT_CACHE_TTL_SECONDS = 15 * 60.0
+
+_JOBS: dict[str, EbaySoldTopJob] = {}
+_RESULT_CACHE: dict[tuple[str, float, int, int, int], tuple[float, dict[str, Any]]] = {}
+
+
+def _gc_jobs(now: float | None = None) -> None:
+ """Drop jobs whose last update is older than the TTL."""
+ n = now if now is not None else time.time()
+ stale = [jid for jid, job in _JOBS.items() if n - job.updated_at > _JOB_TTL_SECONDS]
+ for jid in stale:
+ _JOBS.pop(jid, None)
+
+
+def _gc_cache(now: float | None = None) -> None:
+ """Drop expired entries from the result cache."""
+ n = now if now is not None else time.time()
+ stale = [k for k, (ts, _) in _RESULT_CACHE.items() if n - ts > _RESULT_CACHE_TTL_SECONDS]
+ for k in stale:
+ _RESULT_CACHE.pop(k, None)
+
+
+def _cache_key(
+ *, q: str, window_hours: float, pages: int, top_limit: int, min_count: int,
+) -> tuple[str, float, int, int, int]:
+ return (q.strip().lower(), float(window_hours), int(pages), int(top_limit), int(min_count))
+
+
+def _peek_cached_result(
+ *, q: str, window_hours: float, pages: int, top_limit: int, min_count: int,
+) -> dict[str, Any] | None:
+ """Return a cached top-sold result body if still fresh, else ``None``."""
+ _gc_cache()
+ key = _cache_key(
+ q=q, window_hours=window_hours, pages=pages,
+ top_limit=top_limit, min_count=min_count,
+ )
+ entry = _RESULT_CACHE.get(key)
+ if entry is None:
+ return None
+ ts, body = entry
+ if time.time() - ts > _RESULT_CACHE_TTL_SECONDS:
+ _RESULT_CACHE.pop(key, None)
+ return None
+ return body
+
+
+def _store_cached_result(
+ *,
+ q: str,
+ window_hours: float,
+ pages: int,
+ top_limit: int,
+ min_count: int,
+ body: dict[str, Any],
+) -> None:
+ key = _cache_key(
+ q=q, window_hours=window_hours, pages=pages,
+ top_limit=top_limit, min_count=min_count,
+ )
+ _RESULT_CACHE[key] = (time.time(), body)
+
+
+async def _run_job(job: EbaySoldTopJob, app: AppSettings) -> None:
+ """Execute the scrape + aggregation for ``job``, mutating it in place."""
+ job.status = "running"
+ job.started_at = time.time()
+ job.updated_at = job.started_at
+
+ async def _on_page_done(page_num: int, total_observed: int) -> None:
+ job.pages_done = page_num
+ job.total_observed = total_observed
+ job.updated_at = time.time()
+
+ try:
+ items, err = await scrape_sold_listings(
+ q=job.q,
+ window_hours=job.window_hours,
+ limit=job.scrape_limit,
+ pages=job.pages,
+ app=app,
+ on_page_done=_on_page_done,
+ )
+ except Exception as exc:
+ logger.exception("eBay sold-top job %s crashed during scrape", job.job_id)
+ job.status = "failed"
+ job.error = f"Erreur interne pendant le scrape : {exc}"
+ job.completed_at = time.time()
+ job.updated_at = job.completed_at
+ return
+
+ grouped = aggregate_top_sold(items, min_count=job.min_count, limit_per_category=job.top_limit)
+
+ # Return the full window-filtered listings (already capped at scrape_limit
+ # by ``_filter_window``). The frontend renders these as the « Liste des
+ # ventes » view, so the user gets list + tops from a single scrape — no
+ # second eBay roundtrip when switching tabs.
+ body = {
+ "query": job.q,
+ "window_hours": job.window_hours,
+ "pages_requested": job.pages,
+ "total_observed": len(items),
+ "items": items,
+ "cards": grouped["cards"],
+ "graded": grouped["graded"],
+ "sealed": grouped["sealed"],
+ "groups_count": {
+ "cards": len(grouped["cards"]),
+ "graded": len(grouped["graded"]),
+ "sealed": len(grouped["sealed"]),
+ },
+ "source": "ebay_html_scrape_aggregated",
+ }
+ job.result = body
+ job.error = err
+ job.total_observed = len(items)
+ if err is not None and not items:
+ job.status = "failed"
+ else:
+ job.status = "completed"
+ # Only cache full successes — partial scrapes (with err set) shouldn't
+ # be served to other users until eBay un-blocks us.
+ if err is None:
+ _store_cached_result(
+ q=job.q,
+ window_hours=job.window_hours,
+ pages=job.pages,
+ top_limit=job.top_limit,
+ min_count=job.min_count,
+ body=body,
+ )
+ job.completed_at = time.time()
+ job.updated_at = job.completed_at
+
+
+def submit_job(
+ *,
+ user_id: int,
+ q: str,
+ window_hours: float,
+ pages: int,
+ scrape_limit: int,
+ top_limit: int,
+ min_count: int,
+ app: AppSettings,
+) -> EbaySoldTopJob:
+ """
+ Create a job and schedule its execution on the running event loop.
+
+ If a fresh cached result exists for the same parameters, the returned
+ job is created already in ``completed`` state with the cached body —
+ the caller can short-circuit polling and surface it immediately.
+ """
+ _gc_jobs()
+
+ job = EbaySoldTopJob(
+ job_id=secrets.token_urlsafe(12),
+ user_id=user_id,
+ q=q.strip(),
+ window_hours=window_hours,
+ pages=pages,
+ scrape_limit=scrape_limit,
+ top_limit=top_limit,
+ min_count=min_count,
+ )
+
+ cached = _peek_cached_result(
+ q=q, window_hours=window_hours, pages=pages,
+ top_limit=top_limit, min_count=min_count,
+ )
+ if cached is not None:
+ now = time.time()
+ job.status = "completed"
+ job.result = cached
+ job.pages_done = pages
+ job.total_observed = int(cached.get("total_observed") or 0)
+ job.started_at = now
+ job.completed_at = now
+ job.updated_at = now
+ _JOBS[job.job_id] = job
+ return job
+
+ _JOBS[job.job_id] = job
+ asyncio.create_task(_run_job(job, app))
+ return job
+
+
+def get_job(job_id: str) -> EbaySoldTopJob | None:
+ """Return the job by id, or ``None`` if expired / unknown."""
+ _gc_jobs()
+ return _JOBS.get(job_id)
+
+
+def peek_items_sample(*, q: str, window_hours: float) -> list[dict[str, Any]] | None:
+ """
+ Look up any fresh cached top result matching ``(q, window_hours)`` and
+ return its ``items`` payload, regardless of which ``pages`` /
+ ``top_limit`` / ``min_count`` was used.
+
+ Kept for the legacy ``/sold-scrape`` route, which is no longer driven by
+ the frontend but still reachable; once that route is removed entirely
+ this helper can go too.
+ """
+ _gc_cache()
+ target_q = q.strip().lower()
+ target_window = float(window_hours)
+ best_ts = 0.0
+ best_sample: list[dict[str, Any]] | None = None
+ for (cq, cw, _pages, _top, _min), (ts, body) in _RESULT_CACHE.items():
+ if cq != target_q or cw != target_window:
+ continue
+ if ts <= best_ts:
+ continue
+ sample = body.get("items")
+ if not isinstance(sample, list) or not sample:
+ continue
+ best_ts = ts
+ best_sample = sample
+ return best_sample
diff --git a/web/app/components/market/GoupixDexEbaySoldCard.vue b/web/app/components/market/GoupixDexEbaySoldCard.vue
new file mode 100644
index 0000000..38c67ad
--- /dev/null
+++ b/web/app/components/market/GoupixDexEbaySoldCard.vue
@@ -0,0 +1,97 @@
+
+
+
+
![]()
+
+
+
+
+
+ Vendu
+
+
+
+
+
+
+ {{ row.title }}
+
+
+ {{ priceFormatted }}
+
+
+
+
+
+
+ {{ soldLabel }}
+
+
+
+
+
+ Voir l'annonce
+
+
+
+
+
+
+
diff --git a/web/app/components/market/GoupixDexEbaySoldCardSkeleton.vue b/web/app/components/market/GoupixDexEbaySoldCardSkeleton.vue
new file mode 100644
index 0000000..4ab1251
--- /dev/null
+++ b/web/app/components/market/GoupixDexEbaySoldCardSkeleton.vue
@@ -0,0 +1,24 @@
+
+
+
+
+
diff --git a/web/app/components/market/GoupixDexEbaySoldTopRow.vue b/web/app/components/market/GoupixDexEbaySoldTopRow.vue
new file mode 100644
index 0000000..8ebb326
--- /dev/null
+++ b/web/app/components/market/GoupixDexEbaySoldTopRow.vue
@@ -0,0 +1,97 @@
+
+
+
+ {{ row.rank }}
+
+
+
+
![]()
+
+
+
+
+
+
+
+ {{ row.display_title }}
+
+
+
+
+ {{ row.grade }}
+
+
+
+ médiane {{ medianFormatted }}
+
+
+
+ dernière vente {{ approxRecentLabel }}
+
+
+
+
+
+
+ {{ row.count }} {{ row.count > 1 ? 'ventes' : 'vente' }}
+
+
+ {{ totalFormatted }} cumulés
+
+
+ Voir un exemplaire
+
+
+
+
+
+
diff --git a/web/app/components/market/GoupixDexEbaySoldTopSkeleton.vue b/web/app/components/market/GoupixDexEbaySoldTopSkeleton.vue
new file mode 100644
index 0000000..a88806e
--- /dev/null
+++ b/web/app/components/market/GoupixDexEbaySoldTopSkeleton.vue
@@ -0,0 +1,28 @@
+
+
+
+
+
diff --git a/web/app/composables/useApiError.ts b/web/app/composables/useApiError.ts
index 3a54a8b..31e87bd 100644
--- a/web/app/composables/useApiError.ts
+++ b/web/app/composables/useApiError.ts
@@ -5,10 +5,16 @@
* @returns {string} Single-line message suitable for toasts / inline alerts.
*/
export function apiErrorMessage(e: unknown): string {
- if (!e || typeof e !== 'object' || !('response' in e)) {
- return 'Erreur réseau'
+ if (!e || typeof e !== 'object') {
+ return 'Erreur inconnue'
}
- const data = (e as { response?: { data?: { detail?: unknown } } }).response?.data
+ const err = e as {
+ message?: string
+ code?: string
+ response?: { status?: number; data?: { detail?: unknown } }
+ }
+
+ const data = err.response?.data
const d = data?.detail
if (typeof d === 'string') {
return d
@@ -18,5 +24,24 @@ export function apiErrorMessage(e: unknown): string {
.map((x) => (typeof x === 'object' && x && 'msg' in x ? String((x as { msg: string }).msg) : JSON.stringify(x)))
.join(' · ')
}
+
+ if (!err.response) {
+ const msg = typeof err.message === 'string' ? err.message.trim() : ''
+ if (msg) {
+ const head =
+ err.code === 'ECONNREFUSED' || err.code === 'ERR_NETWORK' || err.code === 'ECONNABORTED'
+ ? 'Impossible de joindre l’API'
+ : err.code === 'ERR_CANCELED'
+ ? 'Requête annulée'
+ : 'Erreur réseau'
+ return `${head} — ${msg}`
+ }
+ return 'Erreur réseau (aucune réponse du serveur). Vérifiez que l’API tourne et que NUXT_PUBLIC_API_BASE est correct.'
+ }
+
+ const status = err.response.status
+ if (typeof status === 'number') {
+ return `Erreur HTTP ${status}`
+ }
return 'Erreur'
}
diff --git a/web/app/composables/useEbaySoldScrape.ts b/web/app/composables/useEbaySoldScrape.ts
new file mode 100644
index 0000000..65adb9d
--- /dev/null
+++ b/web/app/composables/useEbaySoldScrape.ts
@@ -0,0 +1,87 @@
+import type { Ref } from 'vue'
+import type { AxiosInstance } from 'axios'
+
+export interface EbaySoldScrapeRow {
+ title: string
+ price_eur: number | null
+ listing_url: string
+ image_url: string | null
+ item_id: string | null
+ sold_caption: string | null
+ approx_hours_ago: number | null
+}
+
+export interface EbaySoldScrapeResponse {
+ query: string
+ window_hours: number
+ items: EbaySoldScrapeRow[]
+ error: string | null
+ ebay_sold_search_url: string
+ source: string
+}
+
+export interface EbaySoldScrapeInput {
+ q: string
+ windowHours: number
+ limit?: number
+}
+
+/**
+ * eBay France sold-listings scrape (`GET /ebay/market/sold-scrape`) — public HTML
+ * search, no Marketplace Insights OAuth.
+ *
+ * @returns Reactive `loading` / `error` / `result`, plus `load` and `reset`.
+ */
+export function useEbaySoldScrape() {
+ const { $api } = useNuxtApp() as unknown as { $api: AxiosInstance }
+
+ const loading: Ref = ref(false)
+ const error: Ref = ref(null)
+ const result: Ref = ref(null)
+
+ /**
+ * Run a sold-listings scrape; stores `result` or sets `error` on failure.
+ *
+ * @param input - Query text, window in hours, and optional row limit.
+ * @returns {Promise} API payload on success, or `null` after error handling.
+ */
+ async function load(input: EbaySoldScrapeInput): Promise {
+ loading.value = true
+ error.value = null
+ try {
+ const { data } = await $api.get('/ebay/market/sold-scrape', {
+ params: {
+ q: input.q.trim(),
+ window_hours: input.windowHours,
+ limit: input.limit ?? 50,
+ },
+ timeout: 120_000,
+ })
+ result.value = data
+ if (data.error) {
+ error.value = data.error
+ }
+ return data
+ } catch (err: unknown) {
+ const msg = apiErrorMessage(err)
+ error.value = msg
+ result.value = null
+ return null
+ } finally {
+ loading.value = false
+ }
+ }
+
+ /**
+ * Clear `result`, `error`, and `loading` — e.g. when leaving the page.
+ *
+ * @returns {void} Nothing.
+ */
+ function reset(): void {
+ result.value = null
+ error.value = null
+ loading.value = false
+ }
+
+ return { loading, error, result, load, reset }
+}
diff --git a/web/app/composables/useEbaySoldTop.ts b/web/app/composables/useEbaySoldTop.ts
new file mode 100644
index 0000000..0ac8282
--- /dev/null
+++ b/web/app/composables/useEbaySoldTop.ts
@@ -0,0 +1,274 @@
+import type { Ref } from 'vue'
+import type { AxiosInstance } from 'axios'
+
+export type EbaySoldTopCategory = 'cards' | 'graded' | 'sealed'
+export type EbaySoldTopStatus = 'idle' | 'pending' | 'running' | 'completed' | 'failed'
+
+export interface EbaySoldTopRow {
+ rank: number
+ category: EbaySoldTopCategory
+ fingerprint: string
+ grade: string | null
+ display_title: string
+ image_url: string | null
+ sample_listing_url: string | null
+ count: number
+ total_value_eur: number
+ median_price_eur: number | null
+ min_price_eur: number | null
+ max_price_eur: number | null
+ approx_hours_min: number | null
+}
+
+/** Raw sold listing — same shape as ``EbaySoldScrapeRow``. */
+export interface EbaySoldTopItem {
+ title: string
+ price_eur: number | null
+ listing_url: string
+ image_url: string | null
+ item_id: string | null
+ sold_caption: string | null
+ approx_hours_ago: number | null
+}
+
+export interface EbaySoldTopResultBody {
+ query: string
+ window_hours: number
+ pages_requested: number
+ total_observed: number
+ /** All in-window listings, recency-ordered. Drives the List view. */
+ items: EbaySoldTopItem[]
+ cards: EbaySoldTopRow[]
+ graded: EbaySoldTopRow[]
+ sealed: EbaySoldTopRow[]
+ groups_count: { cards: number, graded: number, sealed: number }
+ source: string
+}
+
+interface JobEnvelope {
+ job_id: string
+ status: 'pending' | 'running' | 'completed' | 'failed'
+ query: string
+ window_hours: number
+ pages_requested: number
+ pages_done: number
+ total_observed: number
+ result: EbaySoldTopResultBody | null
+ error: string | null
+ created_at: number
+ updated_at: number
+ started_at: number | null
+ completed_at: number | null
+ ebay_sold_search_url: string
+ cached?: boolean
+}
+
+export interface EbaySoldTopInput {
+ q: string
+ windowHours: number
+ pages?: number
+ scrapeLimit?: number
+ topLimit?: number
+ minCount?: number
+}
+
+const POLL_INTERVAL_MS = 800
+/** Stop polling after this many failed/empty polls in a row to avoid hot loops on transient errors. */
+const POLL_MAX_TICKS = 200
+
+/**
+ * Top sold cards / graded cards / sealed items on eBay.fr via the background
+ * worker: ``POST /ebay/market/sold-top`` then polling ``GET /ebay/market/sold-top/{job_id}``.
+ *
+ * Exposes live progress (``pagesDone``, ``pagesTotal``, ``totalObservedSoFar``) and
+ * guards against stale jobs (a new submit drops in-flight polls).
+ *
+ * @returns Reactive state plus ``load``, ``cancel``, and ``reset``.
+ */
+export function useEbaySoldTop() {
+ const { $api } = useNuxtApp() as unknown as { $api: AxiosInstance }
+
+ const loading: Ref = ref(false)
+ const error: Ref = ref(null)
+ const result: Ref = ref(null)
+ const ebaySearchUrl: Ref = ref(null)
+
+ const status: Ref = ref('idle')
+ const pagesDone: Ref = ref(0)
+ const pagesTotal: Ref = ref(0)
+ const totalObservedSoFar: Ref = ref(0)
+ const cached: Ref = ref(false)
+
+ /** Active job id — when changed by ``load``, in-flight polls are dropped. */
+ const activeJobId: Ref = ref(null)
+ let pollTimer: ReturnType | null = null
+
+ function clearPollTimer(): void {
+ if (pollTimer !== null) {
+ clearTimeout(pollTimer)
+ pollTimer = null
+ }
+ }
+
+ function applyEnvelope(env: JobEnvelope): void {
+ pagesDone.value = env.pages_done
+ pagesTotal.value = env.pages_requested
+ totalObservedSoFar.value = env.total_observed
+ status.value = env.status
+ ebaySearchUrl.value = env.ebay_sold_search_url ?? null
+ if (env.result) {
+ result.value = env.result
+ }
+ if (env.error) {
+ error.value = env.error
+ }
+ }
+
+ /**
+ * Submits a new job (cancels any in-flight job) and polls until completion.
+ *
+ * @param input - Search parameters.
+ * @returns {Promise} Final payload or ``null`` on error / cancelled.
+ */
+ async function load(input: EbaySoldTopInput): Promise {
+ clearPollTimer()
+ activeJobId.value = null
+ loading.value = true
+ error.value = null
+ result.value = null
+ status.value = 'pending'
+ pagesDone.value = 0
+ pagesTotal.value = input.pages ?? 10
+ totalObservedSoFar.value = 0
+ cached.value = false
+
+ let env: JobEnvelope
+ try {
+ const { data } = await $api.post(
+ '/ebay/market/sold-top',
+ {
+ q: input.q.trim(),
+ window_hours: input.windowHours,
+ pages: input.pages ?? 10,
+ scrape_limit: input.scrapeLimit ?? 600,
+ top_limit: input.topLimit ?? 20,
+ min_count: input.minCount ?? 1
+ },
+ { timeout: 30_000 }
+ )
+ env = data
+ } catch (err: unknown) {
+ error.value = apiErrorMessage(err)
+ loading.value = false
+ status.value = 'failed'
+ return null
+ }
+
+ const jobId = env.job_id
+ activeJobId.value = jobId
+ cached.value = !!env.cached
+ applyEnvelope(env)
+
+ if (env.status === 'completed' || env.status === 'failed') {
+ loading.value = false
+ return env.result
+ }
+
+ return new Promise((resolve) => {
+ let ticksLeft = POLL_MAX_TICKS
+
+ const poll = async (): Promise => {
+ if (jobId !== activeJobId.value) {
+ resolve(null)
+ return
+ }
+ try {
+ const { data } = await $api.get(`/ebay/market/sold-top/${jobId}`, {
+ timeout: 15_000
+ })
+ if (jobId !== activeJobId.value) {
+ resolve(null)
+ return
+ }
+ applyEnvelope(data)
+ if (data.status === 'completed' || data.status === 'failed') {
+ loading.value = false
+ resolve(data.result)
+ return
+ }
+ ticksLeft -= 1
+ if (ticksLeft <= 0) {
+ loading.value = false
+ error.value = error.value ?? 'Timeout — the job is taking too long to finish.'
+ resolve(result.value)
+ return
+ }
+ pollTimer = setTimeout(() => {
+ void poll()
+ }, POLL_INTERVAL_MS)
+ } catch (err: unknown) {
+ if (jobId !== activeJobId.value) {
+ resolve(null)
+ return
+ }
+ error.value = apiErrorMessage(err)
+ loading.value = false
+ status.value = 'failed'
+ resolve(null)
+ }
+ }
+
+ pollTimer = setTimeout(() => {
+ void poll()
+ }, POLL_INTERVAL_MS)
+ })
+ }
+
+ /**
+ * Stops polling and forgets the current job. The server job may keep running
+ * (and may still populate the TTL cache for later submits); we simply stop
+ * waiting for its result.
+ *
+ * @returns {void} Nothing.
+ */
+ function cancel(): void {
+ clearPollTimer()
+ activeJobId.value = null
+ if (status.value === 'pending' || status.value === 'running') {
+ status.value = 'idle'
+ }
+ loading.value = false
+ }
+
+ /**
+ * Resets all state (useful when unmounting the page).
+ *
+ * @returns {void} Nothing.
+ */
+ function reset(): void {
+ cancel()
+ result.value = null
+ error.value = null
+ ebaySearchUrl.value = null
+ pagesDone.value = 0
+ pagesTotal.value = 0
+ totalObservedSoFar.value = 0
+ cached.value = false
+ status.value = 'idle'
+ }
+
+ return {
+ loading,
+ error,
+ result,
+ ebaySearchUrl,
+ status,
+ pagesDone,
+ pagesTotal,
+ totalObservedSoFar,
+ cached,
+ load,
+ cancel,
+ reset
+ }
+}
diff --git a/web/app/composables/useMarketListingPrefill.ts b/web/app/composables/useMarketListingPrefill.ts
new file mode 100644
index 0000000..7e993db
--- /dev/null
+++ b/web/app/composables/useMarketListingPrefill.ts
@@ -0,0 +1,134 @@
+import type { MarketListing } from '~/composables/useMarketSearch'
+
+const EXCLUDED_TOKENS = new Set([
+ 'NEW',
+ 'SEALED',
+ 'SCELLE',
+ 'SCELLÉ',
+ 'MINT',
+ 'NM',
+ 'EX',
+ 'EXCELLENT',
+ 'FR',
+ 'FRENCH',
+ 'FRANCAIS',
+ 'FRANÇAIS',
+ 'FRANCE',
+ 'ENGLISH',
+ 'ENG',
+ 'JP',
+ 'JAPAN',
+ 'JAPONAIS',
+ 'POKEMON',
+ 'POKÉMON',
+ 'PSA',
+ 'CGC',
+ 'BGS',
+ 'BECKETT',
+ 'GRADED',
+ 'VMAX',
+ 'VSTAR',
+ 'V',
+ 'GX',
+ 'EX',
+ 'TAG',
+ 'TEAM',
+])
+
+export function mapMarketCondition(ebayCondition: string, isGraded: boolean): string {
+ if (isGraded) {
+ return 'Mint'
+ }
+ const c = (ebayCondition || '').toLowerCase()
+ if (c.includes('new') || c.includes('neuf') || c.includes('scellé') || c.includes('scelle')) {
+ return 'Mint'
+ }
+ if (c.includes('like new') || c.includes('comme neuf')) {
+ return 'Near Mint'
+ }
+ if (c.includes('excellent') || c.includes('très bon')) {
+ return 'Excellent'
+ }
+ if (c.includes('good') || c.includes('bon')) {
+ return 'Good'
+ }
+ if (c.includes('played') || c.includes('acceptable')) {
+ return 'Played'
+ }
+ return 'Near Mint'
+}
+
+export function buildMarketListingDescription(listing: MarketListing): string {
+ const lines: string[] = [listing.title]
+ if (listing.condition) {
+ lines.push('', `État eBay : ${listing.condition}`)
+ }
+ if (listing.graded) {
+ lines.push(`Gradée ${listing.graded.grader}${listing.graded.grade ? ` ${listing.graded.grade}` : ''}`)
+ }
+ return lines.join('\n').trim()
+}
+
+export function parseCardInfoFromTitle(title: string): {
+ pokemonName: string
+ setCode: string
+ cardNumber: string
+} {
+ const result = { pokemonName: '', setCode: '', cardNumber: '' }
+ const numberMatch = title.match(/\b(\d{1,3})\s*\/\s*(\d{1,3})\b/)
+ if (numberMatch) {
+ result.cardNumber = numberMatch[1]!
+ }
+ const setMatch = title.match(
+ /\b(SWSH\d{1,3}[a-z]?|SV\d{1,3}[a-z]?|SM\d{1,3}[a-z]?|BW\d{1,3}[a-z]?|XY\d{1,3}[a-z]?|EB\d{1,3}[a-z]?|EV\d{1,3}[a-z]?|BKS?\d{1,3}[a-z]?)\b/i,
+ )
+ if (setMatch) {
+ result.setCode = setMatch[1]!.toUpperCase()
+ }
+ const cleaned = title.replace(/[^\p{L}\p{N}\s-]+/gu, ' ')
+ const tokens = cleaned.split(/\s+/).filter(Boolean)
+ for (const tok of tokens) {
+ const upper = tok.toUpperCase()
+ if (EXCLUDED_TOKENS.has(upper)) {
+ continue
+ }
+ if (/^\d+$/.test(tok)) {
+ continue
+ }
+ if (tok.length < 3) {
+ continue
+ }
+ if (/^[A-ZÀ-ÖØ-Ý]/u.test(tok[0] ?? '')) {
+ result.pokemonName = tok
+ break
+ }
+ }
+ return result
+}
+
+export function buildArticlePrefillFromListing(listing: MarketListing): Record {
+ const payload: Record = {
+ title: listing.title,
+ purchase_price: String(listing.price_eur),
+ condition: mapMarketCondition(listing.condition, !!listing.graded),
+ }
+ const description = buildMarketListingDescription(listing)
+ if (description) {
+ payload.description = description
+ }
+ const parsed = parseCardInfoFromTitle(listing.title)
+ if (parsed.pokemonName) {
+ payload.pokemon_name = parsed.pokemonName
+ }
+ if (parsed.setCode) {
+ payload.set_code = parsed.setCode
+ }
+ if (parsed.cardNumber) {
+ payload.card_number = parsed.cardNumber
+ }
+ if (listing.image_url) {
+ payload.image_url = listing.image_url
+ }
+ payload.source_url = listing.listing_url
+ return payload
+}
diff --git a/web/app/layouts/default.vue b/web/app/layouts/default.vue
index a0eb601..14888ff 100644
--- a/web/app/layouts/default.vue
+++ b/web/app/layouts/default.vue
@@ -136,6 +136,14 @@ const links: ComputedRef = computed(() => {
open.value = false
},
},
+ {
+ label: 'Ventes terminées eBay',
+ icon: 'i-simple-icons-ebay',
+ to: '/top-ventes-ebay',
+ onSelect: () => {
+ open.value = false
+ },
+ },
{
label: 'Invitations Amazon',
icon: 'i-simple-icons-amazon',
diff --git a/web/app/pages/top-ventes-ebay.vue b/web/app/pages/top-ventes-ebay.vue
new file mode 100644
index 0000000..e7f0b19
--- /dev/null
+++ b/web/app/pages/top-ventes-ebay.vue
@@ -0,0 +1,502 @@
+
+
+
+
+
+
+
+
+ Prix du marché
+
+
+
+
+
+
+
+
+
+
+
+
+
Ventes terminées · eBay France
+
+ Cartes et lots récemment vendus sur eBay France
+
+
+ Explorez les ventes récentes correspondant à votre recherche : dernier prix affiché et période au choix
+ (24 h à 30 jours), puis comparez aussi les tops les plus fréquents.
+
+
+
+
+
+
+
+
+
+
+
+
+
Paramètres de recherche
+
+ Même recherche et même fenêtre : résultat immédiat si disponible (voir indicateur sous les résultats).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {{ loading ? 'Recherche…' : 'Rechercher' }}
+
+
+
+
+
+
+
+ {{ topModeHint }}
+
+
+
+
+
+
+
+
+ Ouvrir la même recherche « vendus » sur eBay.fr
+
+
+
+
+
+
+
+
+
+
+
+ {{ progressLabel }}
+
+ {{ progressPercent }}%
+
+
+
+ {{ topScrape.totalObservedSoFar.value }} ventes uniques analysées —
+ regroupement à la fin de l'analyse.
+
+
+
+
+
+
+
+ {{ listRows.length }} vente(s) dans la fenêtre. Tri d'origine eBay : terminées récemment.
+
+ · servi depuis le cache
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Tapez votre recherche pour démarrer
+
+ Exemple : « Prismatic Evolutions ETB »,
+ « Pikachu VMAX PSA 10 » ou
+ « Charizard 4/102 ».
+
+
+
+
+
+
+
+
+
+
+ Top {{ currentTopRows.length }} {{ currentTopLabel }} — agrégés sur
+ {{ topResult?.total_observed ?? 0 }} ventes analysées
+
+ · servi depuis le cache
+
+
+
+
+
+ {{ topResult?.groups_count.cards ?? 0 }} cartes
+
+
+
+ {{ topResult?.groups_count.graded ?? 0 }} gradées
+
+
+
+ {{ topResult?.groups_count.sealed ?? 0 }} scellés
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
{{ topEmptyHeadline }}
+
+ {{ topEmptyHint }}
+
+
+ Calculer le top pour cette recherche
+
+
+
+
+
+
+
+
+
diff --git a/web/app/utils/ebayImage.ts b/web/app/utils/ebayImage.ts
new file mode 100644
index 0000000..50b193a
--- /dev/null
+++ b/web/app/utils/ebayImage.ts
@@ -0,0 +1,70 @@
+/**
+ * eBay's image CDN (``i.ebayimg.com``) serves the same picture at multiple
+ * sizes by swapping the ``s-lNNN`` suffix in the path:
+ *
+ * https://i.ebayimg.com/images/g//s-l140.jpg ← thumbnail (default in search HTML)
+ * https://i.ebayimg.com/images/g//s-l500.jpg ← medium
+ * https://i.ebayimg.com/images/g//s-l1600.jpg ← large
+ *
+ * The HTML scrape returns the small thumbnail by default — fine for tiny
+ * lists, blurry for the big card grid. These helpers rewrite the URL to a
+ * larger variant so the components stay sharp on retina displays.
+ *
+ * Cached sizes commonly available: 64, 96, 140, 180, 225, 300, 400, 500,
+ * 640, 800, 1000, 1200, 1600. We clamp to that range to avoid 404s.
+ */
+
+const _MIN_WIDTH = 64
+const _MAX_WIDTH = 1600
+const _SIZE_RX = /\/s-l\d+(\.[a-zA-Z]{2,5})(\?.*)?$/
+
+/**
+ * Rewrite an eBay image URL to target a specific pixel width. Non-eBay
+ * URLs (or unparseable ones) come back unchanged so this is safe to apply
+ * blindly.
+ *
+ * @param url - The original eBay image URL (or null / undefined).
+ * @param targetWidth - Desired pixel width on the long edge.
+ * @returns The upgraded URL, or `null` when the input was falsy.
+ */
+export function upgradeEbayImage(
+ url: string | null | undefined,
+ targetWidth = 500
+): string | null {
+ if (!url) {
+ return null
+ }
+ const safe = Math.min(_MAX_WIDTH, Math.max(_MIN_WIDTH, Math.round(targetWidth)))
+ if (!_SIZE_RX.test(url)) {
+ return url
+ }
+ return url.replace(_SIZE_RX, (_match, ext: string, query: string | undefined) => {
+ return `/s-l${safe}${ext}${query ?? ''}`
+ })
+}
+
+/**
+ * Build a ``srcset`` string with 1x and 2x variants so the browser picks
+ * the right size on retina displays.
+ *
+ * @param url - The original eBay image URL.
+ * @param baseWidth - The 1x target width (the 2x variant is twice as large, clamped).
+ * @returns A `srcset` value usable on `
`, or `''` if the input was falsy.
+ */
+export function ebayImageSrcset(
+ url: string | null | undefined,
+ baseWidth = 500
+): string {
+ if (!url) {
+ return ''
+ }
+ const oneX = upgradeEbayImage(url, baseWidth)
+ const twoX = upgradeEbayImage(url, baseWidth * 2)
+ if (!oneX) {
+ return ''
+ }
+ if (!twoX || twoX === oneX) {
+ return `${oneX} 1x`
+ }
+ return `${oneX} 1x, ${twoX} 2x`
+}
diff --git a/web/app/utils/relativeTime.ts b/web/app/utils/relativeTime.ts
new file mode 100644
index 0000000..aa734e4
--- /dev/null
+++ b/web/app/utils/relativeTime.ts
@@ -0,0 +1,40 @@
+/**
+ * Format a delta in hours as a human-readable French relative time
+ * (« il y a 2 min », « il y a 3 h », « il y a 5 j », « il y a 2 sem »…).
+ *
+ * Returns an empty string when the input is null / undefined / non-finite,
+ * so templates can safely render the result with a ``v-if`` on truthiness.
+ *
+ * @param hours - Number of hours since the event (>= 0).
+ * @returns A non-empty French label, or `''` if the input is unusable.
+ */
+export function formatRelativeHours(hours: number | null | undefined): string {
+ if (hours == null || !Number.isFinite(hours) || hours < 0) {
+ return ''
+ }
+ if (hours < 1 / 60) {
+ return 'à l\'instant'
+ }
+ if (hours < 1) {
+ const mins = Math.max(1, Math.round(hours * 60))
+ return `il y a ${mins} min`
+ }
+ if (hours < 24) {
+ const h = Math.max(1, Math.round(hours))
+ return `il y a ${h} h`
+ }
+ const days = Math.round(hours / 24)
+ if (days < 7) {
+ return `il y a ${days} j`
+ }
+ if (days < 31) {
+ const weeks = Math.round(days / 7)
+ return `il y a ${weeks} sem`
+ }
+ const months = Math.round(days / 30)
+ if (months < 12) {
+ return `il y a ${months} mois`
+ }
+ const years = Math.round(days / 365)
+ return `il y a ${years} an${years > 1 ? 's' : ''}`
+}