LEANDERANTONY
diff --git a/‎backend/app.py‎
Lines changed: 17 additions & 6 deletions b/‎backend/app.py‎
Lines changed: 17 additions & 6 deletions
diff --git a/‎backend/maintenance.py‎
Lines changed: 15 additions & 1 deletion b/‎backend/maintenance.py‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎backend/observability.py‎
Lines changed: 151 additions & 0 deletions b/‎backend/observability.py‎
Lines changed: 151 additions & 0 deletions
diff --git a/‎backend/routers/jobs.py‎
Lines changed: 10 additions & 3 deletions b/‎backend/routers/jobs.py‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎backend/routers/workspace.py‎
Lines changed: 33 additions & 2 deletions b/‎backend/routers/workspace.py‎
Lines changed: 33 additions & 2 deletions
diff --git a/‎backend/services/auth_session_service.py‎
Lines changed: 8 additions & 2 deletions b/‎backend/services/auth_session_service.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎backend/services/resume_builder_tools.py‎
Lines changed: 7 additions & 1 deletion b/‎backend/services/resume_builder_tools.py‎
Lines changed: 7 additions & 1 deletion
@@ -1,7 +1,7 @@
 from contextlib import asynccontextmanager
 from typing import AsyncIterator
 
-from fastapi import FastAPI, Request
+from fastapi import Depends, FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from slowapi.errors import RateLimitExceeded
@@ -16,7 +16,11 @@
 from backend.routers.auth import router as auth_router
 from backend.routers.billing import router as billing_router
 from backend.routers.health import router as health_router
-from backend.routers.jobs import admin_router as jobs_admin_router, router as jobs_router
+from backend.routers.jobs import (
+    _verify_refresh_secret,
+    admin_router as jobs_admin_router,
+    router as jobs_router,
+)
 from backend.routers.workspace import router as workspace_router
 from src.errors import QuotaExceededError
 
@@ -119,16 +123,23 @@ def root():
 
 
 @app.get("/health/sentry-debug")
-def sentry_debug() -> None:
+def sentry_debug(_: None = Depends(_verify_refresh_secret)) -> None:
     """Raise an unhandled exception so Sentry sees the issue end-to-end.
 
     Used once at deploy time to confirm the DSN, environment, and
     release tag are wired. The route returns no JSON — Sentry's
     FastAPI integration catches the raise, ships the event, and
     FastAPI's default 500 handler returns "Internal Server Error" to
-    the caller. There is intentionally no auth on this route: it must
-    be callable from anywhere with curl. Remove or gate it behind a
-    feature flag if your threat model objects.
+    the caller.
+
+    Gated behind the admin bearer secret (``_verify_refresh_secret``,
+    review L1): previously anyone could curl this to mint 500s / Sentry
+    noise on demand. Deploy-time smoke testing still works — pass the
+    ``REFRESH_CACHE_SECRET`` bearer token — but anonymous callers now get
+    a 401 (or 503 if no secret is configured) before the body ever runs,
+    so the crash only fires for an authorized caller. The gate raises an
+    HTTPException, which FastAPI handles cleanly (not an unhandled 500),
+    so a blocked call generates no Sentry event.
 
     Path is OUTSIDE ``settings.api_prefix`` (no /api/) so it doesn't
     accidentally appear in the workspace-API surface that auth /
 
@@ -40,6 +40,11 @@
 from datetime import datetime, timedelta, timezone
 from typing import Any, Optional
 
+from backend.observability import (
+    SAVED_WORKSPACES_RETENTION_MONITOR_CONFIG,
+    SAVED_WORKSPACES_RETENTION_MONITOR_SLUG,
+    sentry_cron_monitor,
+)
 from backend.tiers import Tier, resolve_user_tier, retention_days_for_tier
 from src.config import (
     SUPABASE_SAVED_WORKSPACES_TABLE,
@@ -309,7 +314,16 @@ def main() -> None:
     run) invokes this with `python -m backend.maintenance`; output
     is JSON so structured-log pipelines can ingest it directly.
     """
-    summary = sweep_expired_workspaces()
+    # Wrap the sweep in a Sentry cron check-in (review M22) so a stopped or
+    # failing retention cron pages the operator instead of silently leaving
+    # Free-tier data past its retention promise. No-op when Sentry is
+    # disabled / under pytest; the monitor is upserted from the config so it
+    # never needs touching in the Sentry UI.
+    with sentry_cron_monitor(
+        SAVED_WORKSPACES_RETENTION_MONITOR_SLUG,
+        SAVED_WORKSPACES_RETENTION_MONITOR_CONFIG,
+    ):
+        summary = sweep_expired_workspaces()
     print(json.dumps(summary.to_dict(), indent=2))
 
 
 
@@ -262,6 +262,46 @@ def _init_posthog(settings: BackendSettings) -> None:
 # product-scoped dashboards.
 _PRODUCT_TAG = "jobagent"
 
+# Header the browser sets on anonymous calls (review M21) so a server-side
+# funnel event can be attributed to the SAME PostHog person as the client-side
+# session, instead of every anonymous visitor collapsing onto one shared
+# "anonymous" distinct id. The browser sends `posthog.get_distinct_id()`; on
+# login the client calls `posthog.identify(userId)`, which auto-aliases that
+# anonymous id to the Supabase id — so the whole pre-login → signup path
+# stitches into one person and anonymous→signup conversion is computable.
+#
+# PostHog distinct ids are client-controlled by design, so trusting this header
+# for analytics attribution carries no security weight: a signed-in caller's
+# Supabase id ALWAYS takes precedence (see `resolve_distinct_id`), and the value
+# is never used for auth, ownership, or quota — only as a PostHog person key.
+POSTHOG_DISTINCT_ID_HEADER = "X-PostHog-Distinct-Id"
+
+# Upper bound on the browser-supplied id we'll honor. PostHog's own ids are
+# short uuids; this just stops a pathological header from ballooning an event.
+_MAX_BROWSER_DISTINCT_ID_LEN = 200
+
+
+def resolve_distinct_id(
+    user_id: str | None,
+    browser_distinct_id: str | None = None,
+) -> str:
+    """Pick the best PostHog distinct id for an event (review M21).
+
+    Precedence: the authenticated Supabase id, then the browser's own anonymous
+    distinct id (so anonymous funnel events join the client-side session rather
+    than collapsing onto one shared ``"anonymous"`` person), then the
+    ``"anonymous"`` constant as a last resort for server-to-server callers with
+    neither. The browser value is length-clamped and never trusted for anything
+    but this person key.
+    """
+    resolved = (user_id or "").strip()
+    if resolved:
+        return resolved
+    browser = (browser_distinct_id or "").strip()
+    if browser:
+        return browser[:_MAX_BROWSER_DISTINCT_ID_LEN]
+    return "anonymous"
+
 
 def capture_event(
     distinct_id: str,
@@ -359,6 +399,21 @@ def shutdown_observability() -> None:
     "recovery_threshold": 1,
 }
 
+# The tier-aware retention sweeper (backend/maintenance.py) runs daily and
+# deletes Free workspaces > 7d / Pro > 30d. Unmonitored, a stopped cron would
+# silently leave Free-tier data past its stated retention / GDPR promise with
+# nothing to page the operator (review M22). Daily at 03:00 UTC; reconcile the
+# `value` here with the actual prod crontab if it differs.
+SAVED_WORKSPACES_RETENTION_MONITOR_SLUG = "saved-workspaces-retention"
+SAVED_WORKSPACES_RETENTION_MONITOR_CONFIG: dict[str, Any] = {
+    "schedule": {"type": "crontab", "value": "0 3 * * *"},
+    "timezone": "UTC",
+    "checkin_margin": 60,
+    "max_runtime": 10,
+    "failure_issue_threshold": 1,
+    "recovery_threshold": 1,
+}
+
 
 def _sentry_active() -> bool:
     """True when cron check-ins should actually be sent.
@@ -441,3 +496,99 @@ def sentry_cron_monitor(
                 duration=time.monotonic() - started,
                 monitor_config=monitor_config,
             )
+
+
+# ---------------------------------------------------------------------------
+# Sentry scope-enrichment helpers (review M23)
+#
+# Analysis (POST /workspace/{id}/run) and artifact export raise into Sentry as
+# bare 5xx with no actor, no pipeline stage, and no export descriptor — every
+# issue reads identically and triage starts from zero. These thin wrappers add
+# the actor (set_user), the failing stage (set_tag + breadcrumb), and the
+# export descriptor (set_context) onto the active Sentry scope.
+#
+# Telemetry must never break the request it decorates: each helper is a no-op
+# when Sentry is inactive (or under pytest) and swallows every error. They are
+# intentionally tiny pass-throughs so callers don't import sentry_sdk directly
+# or repeat the hasattr/guard dance — and so the orchestrator's own control
+# flow (praised, left untouched) never has to learn about Sentry.
+# ---------------------------------------------------------------------------
+
+
+def _sentry_sdk_or_none():
+    """Return the ``sentry_sdk`` module when enrichment should apply, else None.
+
+    Mirrors ``_sentry_active``'s guard (pytest off, client active) and folds in
+    the import so every helper below is a one-liner that can't raise on a
+    missing/disabled SDK.
+    """
+    if not _sentry_active():
+        return None
+    try:
+        import sentry_sdk
+    except Exception:  # pragma: no cover — defensive
+        return None
+    return sentry_sdk
+
+
+def set_sentry_user(user_id: str | None) -> None:
+    """Attach the acting user's id to the Sentry scope (just the id, no PII).
+
+    Lets an analysis/export 5xx be filtered to a single account instead of
+    reading as an anonymous platform-wide failure. No-op for anonymous calls
+    (falsy ``user_id``) and when Sentry is inactive.
+    """
+    if not user_id:
+        return
+    sentry_sdk = _sentry_sdk_or_none()
+    if sentry_sdk is None:
+        return
+    with suppress(Exception):
+        if hasattr(sentry_sdk, "set_user"):
+            sentry_sdk.set_user({"id": str(user_id)})
+
+
+def set_sentry_tag(key: str, value: Any) -> None:
+    """Set an indexed Sentry tag (e.g. ``pipeline_stage``) on the active scope."""
+    sentry_sdk = _sentry_sdk_or_none()
+    if sentry_sdk is None:
+        return
+    with suppress(Exception):
+        if hasattr(sentry_sdk, "set_tag"):
+            sentry_sdk.set_tag(key, value)
+
+
+def set_sentry_context(key: str, data: dict[str, Any]) -> None:
+    """Attach a structured context block (e.g. the export descriptor)."""
+    sentry_sdk = _sentry_sdk_or_none()
+    if sentry_sdk is None:
+        return
+    with suppress(Exception):
+        if hasattr(sentry_sdk, "set_context"):
+            sentry_sdk.set_context(key, dict(data))
+
+
+def add_sentry_breadcrumb(
+    *,
+    category: str,
+    message: str,
+    data: dict[str, Any] | None = None,
+    level: str = "info",
+) -> None:
+    """Drop a breadcrumb so a later error carries the trail that led to it.
+
+    Used at orchestrator stage boundaries (``category="agent"``) so an analysis
+    failure shows which stage was entered last — without touching the
+    orchestrator's own control flow.
+    """
+    sentry_sdk = _sentry_sdk_or_none()
+    if sentry_sdk is None:
+        return
+    with suppress(Exception):
+        if hasattr(sentry_sdk, "add_breadcrumb"):
+            sentry_sdk.add_breadcrumb(
+                category=category,
+                message=message,
+                level=level,
+                data=dict(data or {}),
+            )
@@ -15,7 +15,9 @@
     CACHED_JOBS_HEALTHCHECK_MONITOR_SLUG,
     CACHED_JOBS_REFRESH_MONITOR_CONFIG,
     CACHED_JOBS_REFRESH_MONITOR_SLUG,
+    POSTHOG_DISTINCT_ID_HEADER,
     capture_event,
+    resolve_distinct_id,
     sentry_cron_monitor,
 )
 from backend.rate_limit import LIMIT_LLM, limiter
@@ -101,10 +103,15 @@ def search_jobs(
     result = service.search(domain_query) if live else service.search_cached(domain_query)
     # PostHog funnel event — the top of the job-application funnel.
     # Server-side capture, fire-and-forget; carries no PII (counts +
-    # tier only). `quota_user_id` is "" for anonymous callers, which
-    # capture_event maps to the "anonymous" distinct id.
+    # tier only). For anonymous callers `quota_user_id` is "", so we fall
+    # back to the browser's own PostHog distinct id (review M21) — passed in
+    # the X-PostHog-Distinct-Id header — so this top-of-funnel event attaches
+    # to the same person the client identifies on signup, instead of every
+    # anonymous visitor collapsing onto one shared "anonymous" id.
     capture_event(
-        distinct_id=quota_user_id,
+        distinct_id=resolve_distinct_id(
+            quota_user_id, request.headers.get(POSTHOG_DISTINCT_ID_HEADER)
+        ),
         event="job_searched",
         properties={
             "mode": "live" if live else "cached",
 
@@ -1,7 +1,11 @@
 from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile
 from fastapi.responses import StreamingResponse
 
-from backend.observability import capture_event
+from backend.observability import (
+    capture_event,
+    set_sentry_context,
+    set_sentry_user,
+)
 from backend.quota import enforce_export_entitlement, enforce_llm_budget
 from backend.rate_limit import LIMIT_HEAVY, LIMIT_LLM, LIMIT_PARSE, limiter
 from backend.request_auth import get_optional_auth_tokens, get_required_auth_tokens
@@ -1106,6 +1110,17 @@ def answer_assistant_question(
     payload: WorkspaceAssistantRequestModel,
     auth_tokens=Depends(get_required_auth_tokens),
 ):
+    """Non-streaming assistant answer — retained as a tested fallback (L1/L7).
+
+    The UI talks only to the SSE sibling below; the dead client wrapper
+    (``askWorkspaceAssistant``) was removed. This route is deliberately KEPT
+    rather than deleted (review L7): it shares the SAME accounted code path
+    (``answer_workspace_question`` -> assistant_service, one monthly
+    assistant-turn counter across both routes) and is pinned by the
+    quota / login-required / error-handling suites, so it cannot silently
+    drift from the stream. If a future change makes it a true parallel
+    contract instead of a thin sync mirror, delete it and its tests then.
+    """
     access_token, refresh_token = auth_tokens
     try:
         return answer_workspace_question(
@@ -1290,6 +1305,23 @@ def export_workspace_artifact_route(
         export_format=payload.export_format,
         themes=(export_theme,),
     )
+    # Resolve the actor once and reuse it for both Sentry enrichment and the
+    # PostHog funnel event below. Enrich the Sentry scope BEFORE the render
+    # (review M23) so an export crash — captured by the request scope — carries
+    # who exported what (format / theme / artifact) instead of a bare 5xx.
+    # Both calls are no-ops when Sentry is inactive.
+    user_id, tier = _event_identity(access_token or "", refresh_token or "")
+    set_sentry_user(user_id)
+    set_sentry_context(
+        "export",
+        {
+            "artifact_kind": payload.artifact_kind,
+            "export_format": payload.export_format,
+            "resume_theme": payload.resume_theme,
+            "cover_letter_theme": payload.cover_letter_theme,
+            "tier": tier,
+        },
+    )
     try:
         result = export_workspace_artifact(
             workspace_snapshot=payload.workspace_snapshot,
@@ -1301,7 +1333,6 @@ def export_workspace_artifact_route(
         # PostHog funnel event — the conversion point: the user took an
         # artifact away. Theme + format properties show which export
         # options actually get used.
-        user_id, tier = _event_identity(access_token or "", refresh_token or "")
         capture_event(
             distinct_id=user_id,
             event="artifact_exported",
 
@@ -4,13 +4,14 @@
 from typing import Any
 
 from backend.config import get_backend_settings
+from backend.tiers import resolve_user_tier
 from src.auth_service import AuthService, AuthSession
 from src.config import (
     AUTH_DEFAULT_ACCOUNT_STATUS,
     assisted_workflow_requires_login,
     get_default_plan_tier_for_email,
 )
-from src.errors import AgentExecutionError, AppError, InputValidationError
+from src.errors import AgentExecutionError, InputValidationError
 from src.openai_service import OpenAIService
 from src.quota_service import QuotaService
 from src.saved_jobs_store import SavedJobsStore
@@ -94,11 +95,16 @@ def _load_daily_quota(
     if not usage_store.is_configured():
         return None
     quota_service = QuotaService(auth_service, usage_store)
+    # Source the tier from resolve_user_tier (subscriptions-backed), NOT
+    # app_user.plan_tier (review M1). app_users.plan_tier is client-writable
+    # via the RLS UPDATE policy, so trusting it here let a self-promoted
+    # plan_tier raise the daily allowance; resolve_user_tier reads the
+    # service-role subscriptions table the rest of the gates already use.
     return quota_service.get_daily_quota_status(
         access_token,
         refresh_token,
         app_user.id,
-        app_user.plan_tier,
+        resolve_user_tier(app_user),
     )
 
 
 
@@ -134,7 +134,13 @@ def _fetch_text(url: str, *, timeout: float, max_bytes: int) -> dict:
         response = requests.get(
             url,
             timeout=timeout,
-            allow_redirects=True,
+            # Do NOT follow redirects (review L2). The caller validates the
+            # URL host up front, but a followed redirect chain isn't re-checked
+            # per hop — a residual SSRF surface if raw.githubusercontent.com
+            # ever 3xx'd off-host. The validated raw URL returns 200 with the
+            # body directly; an unexpected redirect now lands as a non-200 and
+            # is rejected as "http_status" below (fail closed).
+            allow_redirects=False,
             # raw.githubusercontent.com returns text — we don't need
             # any of the github.com auth/session cookies, and we don't
             # send a custom UA so we look like a vanilla client.