Skip to content

Commit a868b24

Browse files
committed
Merge PR: Launch-readiness remediation (10 fixes from audit swarm)
Lands the 10 launch-blocker + High fixes from the 73-agent audit swarm's review-report.md, one logical commit per finding for the audit trail. Critical (launch-blockers): - SECURITY-1: BOLA on analyze-jobs routes — owner_user_id binding + auth dependency on status/cancel, 404 (not 403) on mismatch. - CRITICAL-2: async /analyze-jobs path now enforces quota synchronously before spawning the worker AND carries the structured quota error envelope through the polling hook, so a capped Free user hits the global 429 + upgrade CTA instead of the generic "workflow failed" toast. High: - FLOW-3: theme entitlement scoped to the exported artifact (Free résumé export no longer blocked by an unrelated cover-letter theme). - FE-SEC-1: security response headers (X-Frame-Options DENY, CSP Report-Only, HSTS, X-Content-Type-Options, Referrer-Policy). - BACKEND-2: per-user in-flight cap (1 run/user) before the global semaphore, closing the concurrent-run weekly-token bypass and the unrelated-user 503 fairness gap. - LLM-1 + OBS-1: web_search routed through OpenAIService with full metering + cost-tracing; _record_cost_trace ContextVar fallback closes the JD/résumé parser + embedding cost-trace gap. - OBS-2: jd_parsed + resume_built PostHog funnel events plug the funnel hole between job_searched and analysis_started. - PERF-1 + PERF-2: assistant streaming state moved out of WorkspaceShell; buildJobReview memoized; b-canvas children React.memo'd. No more whole-tree reconciliation per stream token or JD keystroke. - A11Y-1 + A11Y-2: shared useAccessibleDialog primitive (focus trap, initial focus, Escape, focus restore) applied to ⌘K palette + FAB popover; palette also gets combobox/listbox semantics. - TEST-1: Vitest + React Testing Library baseline, 5 coverage cases (humanizeApiError, auth-session, useWorkspaceQuota, tier-gate render, JDReview submit wiring). CI frontend job now runs lint + build + test. Explicitly skipped (deferred): - H1 (upgrade CTA URL 404) — payment isn't live yet; revisit when Lemon Squeezy ships. - PERFDB-1/3/4 — 1000-row time-bombs (saved-jobs cleanup, retention sweeper, cached_jobs DDL not in migration); acceptable pre-traction. - All Medium + Low — separate cleanup PR. Test gates passed: 502+ pytest, ruff clean on touched files, tsc + eslint clean on frontend, Vitest baseline green.
2 parents d93ddc4 + d376aac commit a868b24

35 files changed

Lines changed: 4060 additions & 155 deletions

.github/workflows/ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ jobs:
7171
working-directory: frontend
7272
run: npm run lint
7373

74+
- name: Test
75+
working-directory: frontend
76+
run: npm test
77+
7478
- name: Build
7579
working-directory: frontend
7680
run: npm run build

backend/routers/workspace.py

Lines changed: 127 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,46 @@ def _event_identity(access_token: str, refresh_token: str) -> tuple[str, str]:
148148
return (str(getattr(app_user, "id", "") or ""), resolve_user_tier(app_user))
149149

150150

151+
def _require_user_id(access_token: str, refresh_token: str) -> str:
152+
"""Resolve the authenticated caller's ``user_id`` or raise 401.
153+
154+
Used by the analysis-job status / cancel routes to enforce
155+
object-level ownership. The ``job_id`` in the URL is a uuid4 that
156+
leaks via access logs / Referer / telemetry and is NOT an
157+
authorization token on its own (this was an unauthenticated BOLA —
158+
SECURITY-1). Resolving STRICTLY (a 401 on any failure, never an
159+
empty id) guarantees the downstream ownership comparison can never
160+
be skipped fail-open — a best-effort empty id would match a job with
161+
no owner and re-open the hole.
162+
"""
163+
try:
164+
auth_context = resolve_authenticated_context(
165+
access_token=access_token,
166+
refresh_token=refresh_token,
167+
)
168+
except InputValidationError:
169+
raise HTTPException(
170+
status_code=401,
171+
detail="Your session has expired. Sign in again to continue.",
172+
)
173+
except AppError as error:
174+
# Non-credential AppError (backend integration, etc.) → canonical
175+
# status code instead of a generic 500.
176+
_raise_http_error(error)
177+
if auth_context is None:
178+
raise HTTPException(
179+
status_code=401,
180+
detail="Couldn't resolve your session. Sign in again to continue.",
181+
)
182+
user_id = str(getattr(getattr(auth_context, "app_user", None), "id", "") or "")
183+
if not user_id:
184+
raise HTTPException(
185+
status_code=401,
186+
detail="No user identity available for this session.",
187+
)
188+
return user_id
189+
190+
151191
def _file_extension(filename: str | None) -> str:
152192
"""Lower-cased file extension without the dot, or "" when absent."""
153193
name = str(filename or "")
@@ -390,7 +430,7 @@ def upload_job_description(
390430
# the parse so an over-budget request has no side effects.
391431
_enforce_request_llm_budget(access_token or "", refresh_token or "")
392432
try:
393-
return parse_job_description_upload(
433+
result = parse_job_description_upload(
394434
filename=payload.filename,
395435
mime_type=payload.mime_type,
396436
content_base64=payload.content_base64,
@@ -399,6 +439,27 @@ def upload_job_description(
399439
access_token=access_token or "",
400440
refresh_token=refresh_token or "",
401441
)
442+
# PostHog funnel event (review OBS-2): JD parse sits between
443+
# job_searched and analysis_started — the most failure-prone
444+
# step — and previously emitted nothing, so the funnel couldn't
445+
# measure drop-off there. The paste path (unified through this
446+
# route in d93ddc4) sends a synthetic "pasted.txt"; everything
447+
# else is a real upload.
448+
user_id, tier = _event_identity(access_token or "", refresh_token or "")
449+
capture_event(
450+
distinct_id=user_id,
451+
event="jd_parsed",
452+
properties={
453+
"source": (
454+
"paste"
455+
if str(payload.filename or "").lower() == "pasted.txt"
456+
else "upload"
457+
),
458+
"tier": tier,
459+
"file_type": _file_extension(payload.filename),
460+
},
461+
)
462+
return result
402463
except AppError as error:
403464
_raise_http_error(error)
404465

@@ -518,12 +579,23 @@ def generate_resume_builder_route(
518579
refresh_token=refresh_token or "",
519580
session_id=payload.session_id,
520581
)
521-
return _attach_persistence_status(
582+
result = _attach_persistence_status(
522583
response,
523584
persist_result,
524585
access_token=access_token or "",
525586
refresh_token=refresh_token or "",
526587
)
588+
# PostHog funnel event (review OBS-2): the agentic résumé-build
589+
# path emitted nothing, so only résumé *upload* was measurable,
590+
# never a build. `step` distinguishes the structuring pass
591+
# (generate) from the finalize (commit).
592+
user_id, tier = _event_identity(access_token or "", refresh_token or "")
593+
capture_event(
594+
distinct_id=user_id,
595+
event="resume_built",
596+
properties={"tier": tier, "step": "generate"},
597+
)
598+
return result
527599
except ValueError as error:
528600
raise HTTPException(status_code=400, detail=str(error))
529601

@@ -624,6 +696,14 @@ def commit_resume_builder_route(
624696
access_token=access_token or "",
625697
refresh_token=refresh_token or "",
626698
)
699+
# PostHog funnel event (review OBS-2) — the finalize step of the
700+
# agentic résumé build (see the generate route for the rationale).
701+
user_id, tier = _event_identity(access_token or "", refresh_token or "")
702+
capture_event(
703+
distinct_id=user_id,
704+
event="resume_built",
705+
properties={"tier": tier, "step": "commit"},
706+
)
627707
return response
628708
except ValueError as error:
629709
raise HTTPException(status_code=400, detail=str(error))
@@ -754,6 +834,12 @@ def start_workspace_analysis_job_route(
754834
auth_tokens=Depends(get_required_auth_tokens),
755835
):
756836
access_token, refresh_token = auth_tokens
837+
# Resolve the caller once: the user_id binds OWNERSHIP onto the job
838+
# (so only this user can later poll / cancel it — SECURITY-1) and
839+
# also identifies the funnel event. Best-effort here (never raises);
840+
# an unresolvable caller binds owner=None, which fails CLOSED on the
841+
# status/cancel routes (no one can read a job with no owner).
842+
user_id, tier = _event_identity(access_token or "", refresh_token or "")
757843
try:
758844
result = start_workspace_analysis_job(
759845
resume_text=payload.resume_text,
@@ -764,10 +850,11 @@ def start_workspace_analysis_job_route(
764850
premium=payload.premium,
765851
access_token=access_token or "",
766852
refresh_token=refresh_token or "",
853+
owner_user_id=user_id or None,
854+
tier=tier,
767855
)
768856
# PostHog funnel event — async variant of the supervised
769857
# pipeline. Emitted once the job is accepted onto the queue.
770-
user_id, tier = _event_identity(access_token or "", refresh_token or "")
771858
capture_event(
772859
distinct_id=user_id,
773860
event="analysis_started",
@@ -951,8 +1038,16 @@ def get_workspace_quota_route(auth_tokens=Depends(get_optional_auth_tokens)):
9511038
"/analyze-jobs/{job_id}",
9521039
response_model=WorkspaceAnalyzeJobStatusResponseModel,
9531040
)
954-
def get_workspace_analysis_job_route(job_id: str):
955-
payload = get_workspace_analysis_job(job_id)
1041+
def get_workspace_analysis_job_route(
1042+
job_id: str,
1043+
auth_tokens=Depends(get_required_auth_tokens),
1044+
):
1045+
# Object-level authorization (SECURITY-1): the job_id is a uuid4 in
1046+
# the URL, not a secret. Require login and only return a job to its
1047+
# owner; a non-owner gets the same 404 as an unknown id.
1048+
access_token, refresh_token = auth_tokens
1049+
user_id = _require_user_id(access_token or "", refresh_token or "")
1050+
payload = get_workspace_analysis_job(job_id, owner_user_id=user_id)
9561051
if payload is None:
9571052
# `_JOBS` is process-local, so a container restart mid-run drops
9581053
# the job state permanently. The frontend polling hook surfaces
@@ -973,15 +1068,25 @@ def get_workspace_analysis_job_route(job_id: str):
9731068
"/analyze-jobs/{job_id}/cancel",
9741069
response_model=WorkspaceAnalyzeJobStatusResponseModel,
9751070
)
976-
def cancel_workspace_analysis_job_route(job_id: str):
1071+
def cancel_workspace_analysis_job_route(
1072+
job_id: str,
1073+
auth_tokens=Depends(get_required_auth_tokens),
1074+
):
9771075
# Cooperative cancel: sets the flag and returns immediately. The
9781076
# job typically comes back still "running" (the worker observes
9791077
# the flag at its next stage boundary); the frontend keeps polling
9801078
# GET /analyze-jobs/{job_id} until it sees the terminal
981-
# "cancelled". Idempotent for already-terminal jobs. Same
982-
# job_id-scoped access model as the status route (the id is an
983-
# unguessable uuid4 hex; no extra auth surface added).
984-
payload = cancel_workspace_analysis_job(job_id)
1079+
# "cancelled". Idempotent for already-terminal jobs.
1080+
#
1081+
# Object-level authorization (SECURITY-1): require login and scope
1082+
# the cancel to the job's owner. The job_id is a uuid4 in the URL
1083+
# that leaks via logs / Referer / telemetry, so it is NOT an
1084+
# authorization token — a non-owner must not be able to stop a
1085+
# stranger's in-flight run. A mismatch returns the same 404 as an
1086+
# unknown id.
1087+
access_token, refresh_token = auth_tokens
1088+
user_id = _require_user_id(access_token or "", refresh_token or "")
1089+
payload = cancel_workspace_analysis_job(job_id, owner_user_id=user_id)
9851090
if payload is None:
9861091
raise HTTPException(
9871092
status_code=404,
@@ -1169,10 +1274,21 @@ def export_workspace_artifact_route(
11691274
# (best-effort; anon/expired -> "free") and block the DOCX /
11701275
# non-classic_ats entitlement with the canonical 429 upgrade
11711276
# nudge. PDF + classic_ats is unchanged for every tier incl. anon.
1277+
# Gate ONLY the theme that actually renders the artifact being
1278+
# exported. The service renders artifacts[artifact_kind] with its OWN
1279+
# theme, so a résumé export is unaffected by the cover-letter theme
1280+
# (and vice-versa). Passing BOTH themes let an unrelated custom
1281+
# cover-letter theme block a Free user's default-theme résumé PDF
1282+
# with a misleading "Pro+ feature" 429 (review HIGH / FLOW-3).
1283+
export_theme = (
1284+
payload.resume_theme
1285+
if payload.artifact_kind == "tailored_resume"
1286+
else payload.cover_letter_theme
1287+
)
11721288
enforce_export_entitlement(
11731289
_resolve_export_tier(access_token or "", refresh_token or ""),
11741290
export_format=payload.export_format,
1175-
themes=(payload.resume_theme, payload.cover_letter_theme),
1291+
themes=(export_theme,),
11761292
)
11771293
try:
11781294
result = export_workspace_artifact(

backend/services/resume_builder_tools.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -353,16 +353,16 @@ def _web_search(
353353
"to describe the external context directly."
354354
),
355355
}
356-
client = getattr(openai_service, "_client", None)
357-
if client is None:
358-
return {
359-
"ok": False,
360-
"error": "no_openai_client",
361-
"message": "OpenAI client is not initialized.",
362-
}
363-
356+
# Route the inner call through the OpenAIService accounting layer
357+
# (review LLM-1) instead of reaching into ._client directly. The
358+
# service's run_builtin_web_search records token usage + a cost-trace
359+
# row AND enforces the session budget, so web_search spend is no
360+
# longer invisible to the weekly meter / COGS report. It also applies
361+
# WEB_SEARCH_TIMEOUT_SECONDS to the call (previously the 30s constant
362+
# was dead and the search ran at the 120s client default — LLM-2).
364363
try:
365-
response = client.responses.create(
364+
response = openai_service.run_builtin_web_search(
365+
cleaned_query,
366366
model="gpt-5.4-mini",
367367
instructions=(
368368
"You are a research assistant. Use the web_search tool to "
@@ -371,11 +371,8 @@ def _web_search(
371371
"max. If the search returns nothing useful, say so plainly "
372372
"instead of inventing."
373373
),
374-
input=cleaned_query,
375-
store=False,
376374
max_output_tokens=600,
377-
tools=[{"type": "web_search"}],
378-
tool_choice="auto",
375+
timeout=WEB_SEARCH_TIMEOUT_SECONDS,
379376
)
380377
except Exception as exc:
381378
LOGGER.exception("web_search dispatch failed.")

0 commit comments

Comments
 (0)