|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
5 | 5 | import logging |
6 | | -from typing import Annotated |
| 6 | +from typing import Annotated, Any |
7 | 7 |
|
8 | 8 | from fastapi import APIRouter, Depends, HTTPException, Query, status |
| 9 | +from pydantic import BaseModel, Field |
9 | 10 |
|
10 | 11 | from app_types.ebay_browse import ( |
11 | 12 | ConditionFilter, |
|
19 | 20 | from services.ebay_app_oauth_service import ebay_app_oauth_configured |
20 | 21 | from services.ebay_browse_service import DEFAULT_LIMIT, MAX_LIMIT, browse_search |
21 | 22 | from services.ebay_price_aggregator_service import aggregate_prices, partition_outliers |
| 23 | +from services.ebay_sold_scrape_rate_limit import acquire_sold_scrape_slot |
| 24 | +from services.ebay_sold_scrape_service import ebay_fr_sold_search_url, scrape_sold_listings |
| 25 | +from services.ebay_sold_top_service import aggregate_top_sold |
| 26 | +from services.ebay_sold_top_worker import get_job, peek_items_sample, submit_job |
22 | 27 |
|
23 | 28 | logger = logging.getLogger(__name__) |
24 | 29 |
|
@@ -159,3 +164,149 @@ async def search_market( |
159 | 164 | "total_matches": total, |
160 | 165 | "warnings": warnings, |
161 | 166 | } |
| 167 | + |
| 168 | + |
| 169 | +@router.get("/sold-scrape", response_model=None) |
| 170 | +async def sold_scrape_html( |
| 171 | + user: Annotated[User, Depends(get_current_user)], |
| 172 | + q: Annotated[str, Query(min_length=2, max_length=256)], |
| 173 | + window_hours: Annotated[float, Query(ge=1, le=720)] = 168, |
| 174 | + limit: Annotated[int, Query(ge=1, le=60)] = 50, |
| 175 | +) -> dict[str, Any]: |
| 176 | + """ |
| 177 | + **Completed listings** (sold) via **public eBay HTML search** — no Marketplace Insights OAuth. |
| 178 | +
|
| 179 | + May fail with bot protection (403); optional ``EBAY_SOLD_SCRAPE_PROXY`` in server env. |
| 180 | + Rate-limited per user (default: one call every ``EBAY_SOLD_SCRAPE_MIN_INTERVAL_SECONDS``). |
| 181 | + Window goes up to ``720`` hours (30 days). |
| 182 | + """ |
| 183 | + app = get_settings() |
| 184 | + |
| 185 | + # If the worker has a fresh cached top result for the same (q, window), |
| 186 | + # reuse its items_sample — saves an eBay roundtrip *and* the rate-limit |
| 187 | + # slot, which matters when the user just searched in Top mode and |
| 188 | + # switches to List mode. |
| 189 | + cached_sample = peek_items_sample(q=q.strip(), window_hours=window_hours) |
| 190 | + if cached_sample is not None: |
| 191 | + return { |
| 192 | + "query": q.strip(), |
| 193 | + "window_hours": window_hours, |
| 194 | + "items": cached_sample[:limit], |
| 195 | + "error": None, |
| 196 | + "ebay_sold_search_url": ebay_fr_sold_search_url( |
| 197 | + q=q.strip(), page_size=min(60, max(limit, 10)), |
| 198 | + ), |
| 199 | + "source": "ebay_html_scrape_cached_from_top", |
| 200 | + "cached": True, |
| 201 | + } |
| 202 | + |
| 203 | + retry_after = await acquire_sold_scrape_slot(user.id, app.ebay_sold_scrape_min_interval_seconds) |
| 204 | + if retry_after > 0: |
| 205 | + iv = app.ebay_sold_scrape_min_interval_seconds |
| 206 | + raise HTTPException( |
| 207 | + status_code=status.HTTP_429_TOO_MANY_REQUESTS, |
| 208 | + detail=( |
| 209 | + f"Rate limit: at most one eBay sold-search every {iv:g} s " |
| 210 | + f"(retry in {retry_after} s)." |
| 211 | + ), |
| 212 | + headers={"Retry-After": str(retry_after)}, |
| 213 | + ) |
| 214 | + items, err = await scrape_sold_listings(q=q.strip(), window_hours=window_hours, limit=limit, app=app) |
| 215 | + return { |
| 216 | + "query": q.strip(), |
| 217 | + "window_hours": window_hours, |
| 218 | + "items": items, |
| 219 | + "error": err, |
| 220 | + "ebay_sold_search_url": ebay_fr_sold_search_url(q=q.strip(), page_size=min(60, max(limit, 10))), |
| 221 | + "source": "ebay_html_scrape", |
| 222 | + "cached": False, |
| 223 | + } |
| 224 | + |
| 225 | + |
| 226 | +class SoldTopSubmitBody(BaseModel): |
| 227 | + """Body for ``POST /ebay/market/sold-top`` — schedules a background scrape.""" |
| 228 | + |
| 229 | + q: str = Field(min_length=2, max_length=256) |
| 230 | + window_hours: float = Field(default=168, ge=1, le=720) |
| 231 | + pages: int = Field(default=10, ge=1, le=20) |
| 232 | + scrape_limit: int = Field(default=600, ge=10, le=1000) |
| 233 | + top_limit: int = Field(default=20, ge=1, le=100) |
| 234 | + min_count: int = Field(default=1, ge=1, le=20) |
| 235 | + |
| 236 | + |
| 237 | +@router.post("/sold-top", response_model=None, status_code=status.HTTP_202_ACCEPTED) |
| 238 | +async def sold_top_submit( |
| 239 | + user: Annotated[User, Depends(get_current_user)], |
| 240 | + body: SoldTopSubmitBody, |
| 241 | +) -> dict[str, Any]: |
| 242 | + """ |
| 243 | + Submit a background top-sold scrape job and return its ``job_id`` |
| 244 | + (consumed via ``GET /ebay/market/sold-top/{job_id}``). |
| 245 | +
|
| 246 | + When a fresh cached result (TTL 15 min) exists for the same parameters, |
| 247 | + the job comes back already in ``status="completed"`` with its |
| 248 | + ``result`` populated — no eBay scrape triggered. The per-user rate-limit |
| 249 | + only fires when an actual scrape is launched. |
| 250 | + """ |
| 251 | + app = get_settings() |
| 252 | + job = submit_job( |
| 253 | + user_id=user.id, |
| 254 | + q=body.q.strip(), |
| 255 | + window_hours=body.window_hours, |
| 256 | + pages=body.pages, |
| 257 | + scrape_limit=body.scrape_limit, |
| 258 | + top_limit=body.top_limit, |
| 259 | + min_count=body.min_count, |
| 260 | + app=app, |
| 261 | + ) |
| 262 | + |
| 263 | + cache_hit = job.status == "completed" and job.result is not None |
| 264 | + if not cache_hit: |
| 265 | + retry_after = await acquire_sold_scrape_slot( |
| 266 | + user.id, app.ebay_sold_scrape_min_interval_seconds, |
| 267 | + ) |
| 268 | + if retry_after > 0: |
| 269 | + iv = app.ebay_sold_scrape_min_interval_seconds |
| 270 | + raise HTTPException( |
| 271 | + status_code=status.HTTP_429_TOO_MANY_REQUESTS, |
| 272 | + detail=( |
| 273 | + f"Rate limit: at most one eBay sold-search every {iv:g} s " |
| 274 | + f"(retry in {retry_after} s)." |
| 275 | + ), |
| 276 | + headers={"Retry-After": str(retry_after)}, |
| 277 | + ) |
| 278 | + |
| 279 | + return { |
| 280 | + **job.to_public(), |
| 281 | + "ebay_sold_search_url": ebay_fr_sold_search_url(q=body.q.strip(), page_size=60), |
| 282 | + "cached": cache_hit, |
| 283 | + } |
| 284 | + |
| 285 | + |
| 286 | +@router.get("/sold-top/{job_id}", response_model=None) |
| 287 | +async def sold_top_status( |
| 288 | + user: Annotated[User, Depends(get_current_user)], |
| 289 | + job_id: str, |
| 290 | +) -> dict[str, Any]: |
| 291 | + """ |
| 292 | + Return the current state of a ``sold-top`` job. |
| 293 | +
|
| 294 | + The client polls this endpoint while ``status`` is ``pending`` or |
| 295 | + ``running``. Once ``completed`` (or ``failed``), ``result`` is populated |
| 296 | + and polling can stop. A job may only be read by its creator. |
| 297 | + """ |
| 298 | + job = get_job(job_id) |
| 299 | + if job is None: |
| 300 | + raise HTTPException( |
| 301 | + status_code=status.HTTP_404_NOT_FOUND, |
| 302 | + detail="Unknown or expired job.", |
| 303 | + ) |
| 304 | + if job.user_id != user.id: |
| 305 | + raise HTTPException( |
| 306 | + status_code=status.HTTP_403_FORBIDDEN, |
| 307 | + detail="This job does not belong to you.", |
| 308 | + ) |
| 309 | + return { |
| 310 | + **job.to_public(), |
| 311 | + "ebay_sold_search_url": ebay_fr_sold_search_url(q=job.q, page_size=60), |
| 312 | + } |
0 commit comments