= {
+ resume: "",
+ jobs: "",
+ jd: "",
+ analysis: !resumeText.trim()
+ ? "Upload a résumé in Step 01 to unlock Analysis."
+ : !manualJobText.trim()
+ ? "Paste a job description in Step 03 to unlock Analysis."
+ : "Both inputs are loaded — Analysis is ready to run.",
+ };
return (
{
- if (ready) setMainTab(step);
+ if (ready) {
+ setMainTab(step);
+ return;
+ }
+ // Locked-step click handling: instead of a
+ // silent no-op (the old behavior with `disabled`),
+ // route the user to the missing prerequisite and
+ // surface a helpful inline notice. Falls back to
+ // a plain notice if no specific prereq is known.
+ const prereq = lockedPrereqStep[step];
+ const message = lockedPrereqMessage[step] || lockReason[step];
+ if (message) {
+ setWorkspaceNotice({ level: "warning", message });
+ }
+ if (prereq) {
+ setMainTab(prereq);
+ }
}}
role="tab"
title={tooltip}
diff --git a/src/config.py b/src/config.py
index 9934d9f..8f9d3ef 100644
--- a/src/config.py
+++ b/src/config.py
@@ -323,7 +323,13 @@ def get_daily_quota_for_plan(plan_tier: str):
normalized_plan = (plan_tier or AUTH_DEFAULT_PLAN_TIER).strip().lower()
if normalized_plan in {"admin", "internal"}:
return {"max_calls": None, "max_total_tokens": None, "plan_tier": normalized_plan}
- if normalized_plan in {"paid", "pro", "plus"}:
+ # Paid tiers share the same daily cost-cap regardless of monthly
+ # feature limits — the monthly TIER_CAPS in backend/tiers.py is
+ # where tier differentiation actually lives. The daily cap here
+ # is a runaway-cost safety net; Pro and Business get the same
+ # generous daily budget so a Business user isn't silently throttled
+ # mid-workflow just because they fell through to the FREE bucket.
+ if normalized_plan in {"paid", "pro", "plus", "business"}:
return {
"max_calls": PAID_TIER_MAX_CALLS_PER_DAY,
"max_total_tokens": PAID_TIER_MAX_TOKENS_PER_DAY,
diff --git a/src/services/jd_llm_parser_service.py b/src/services/jd_llm_parser_service.py
index e77f2b9..557b2e7 100644
--- a/src/services/jd_llm_parser_service.py
+++ b/src/services/jd_llm_parser_service.py
@@ -14,17 +14,76 @@
from __future__ import annotations
-import json
+import re
from typing import Any
from src.openai_service import OpenAIService
+# Section-header strings the LLM sometimes echoes as a list item when
+# the JD's headings sit immediately above a bullet block (e.g. the
+# n8n "AI Product Builder" listing ends Must-Haves with the literal
+# label "REQUIREMENTS / MUST-HAVES"). Anything matching this pattern
+# after normalizing whitespace / punctuation is dropped from
+# must_haves / nice_to_haves.
+_SECTION_LABEL_ARTIFACT = re.compile(
+ r"^(requirements?|must[\s\-_/]?haves?|nice[\s\-_/]?to[\s\-_/]?haves?|"
+ r"qualifications?|preferred(?:\s+qualifications?)?|good\s+signals?|"
+ r"good\s+to\s+have|bonus(?:\s+points?)?|"
+ # "What you'll do" / "What we're looking for" / "What we look for" /
+ # "What we want" — heading-style phrases the LLM sometimes echoes
+ # as a list item. Accepts straight + smart apostrophes, optional
+ # contraction ('re / 'll / are / will), an operative verb, and a
+ # trailing "for" / "in" preposition.
+ r"what\s+(?:we|you)(?:['’](?:re|ll|s)|\s+(?:are|will))?"
+ r"(?:\s+(?:looking|look|need|want|do))(?:\s+(?:for|in|at))?"
+ r")[\s:.\-]*$",
+ re.IGNORECASE,
+)
+
+# Benefits / perks vocabulary the LLM sometimes swept into
+# nice_to_haves when a "Benefits" block sits adjacent to the
+# "Nice to have" block in the JD. These are compensation, not
+# job-requirement signal — they don't belong in either list because
+# matching against them produces nonsense ("candidate has 401k").
+_BENEFIT_KEYWORDS = (
+ "vacation", " pto ", "(pto)", "paid time off", "parental leave",
+ "maternity leave", "paternity leave", "health insurance",
+ "medical insurance", "dental insurance", "vision insurance",
+ "medical, dental", "dental, vision", "health, dental",
+ "health coverage", "medical coverage", " hsa ", "(hsa)",
+ "health savings", "401(k)", "401k", " 401 k ", "retirement plan",
+ "stock options", "equity grant", "rsu", " esop ",
+ "wellness stipend", "wellness benefit", "gym membership",
+ "free lunch", "snacks", "remote stipend", "home office stipend",
+ "commuter benefit", "transit benefit", "life insurance",
+ "disability insurance",
+)
+
+
+def _is_section_label_artifact(text: str) -> bool:
+ return bool(_SECTION_LABEL_ARTIFACT.match(text.strip()))
+
+
+def _looks_like_benefit(text: str) -> bool:
+ # Pad with spaces so the substring scan treats abbreviation tokens
+ # like ' pto ' / ' 401k ' as whole-word matches instead of matching
+ # inside e.g. 'computational tools'.
+ haystack = " " + text.strip().lower() + " "
+ return any(keyword in haystack for keyword in _BENEFIT_KEYWORDS)
+
+
def _coerce_string(value: Any) -> str:
return str(value or "").strip()
-def _coerce_string_list(value: Any, *, limit: int = 24) -> list[str]:
+def _coerce_string_list(
+ value: Any,
+ *,
+ limit: int = 24,
+ drop_section_labels: bool = False,
+ drop_benefits: bool = False,
+) -> list[str]:
if not isinstance(value, list):
return []
cleaned: list[str] = []
@@ -34,6 +93,16 @@ def _coerce_string_list(value: Any, *, limit: int = 24) -> list[str]:
normalized = text.lower()
if not text or normalized in seen:
continue
+ if drop_section_labels and _is_section_label_artifact(text):
+ # LLM echoed a section header (e.g. "REQUIREMENTS",
+ # "MUST-HAVES") as a list item — silently drop instead of
+ # surfacing as a requirement.
+ continue
+ if drop_benefits and _looks_like_benefit(text):
+ # Compensation / perks crept into a requirements list.
+ # Drop instead of matching against the candidate's
+ # qualifications, which would produce nonsense signal.
+ continue
cleaned.append(text)
seen.add(normalized)
if len(cleaned) >= limit:
@@ -61,8 +130,16 @@ def _build_jd_llm_parser_prompt(jd_text: str) -> dict[str, Any]:
"(e.g. 'communication', 'leadership', 'collaboration')",
"must_haves": "array of strings — required-experience phrases the JD marks as "
"mandatory (e.g. '5+ years building production backend services', "
- "'BSc in Computer Science'). Each entry should be a distinct line.",
- "nice_to_haves": "array of strings — preferred / bonus / nice-to-have qualifications",
+ "'BSc in Computer Science'). Each entry should be a distinct line. "
+ "Do NOT echo section headers like 'REQUIREMENTS', 'MUST-HAVES', "
+ "'QUALIFICATIONS' as list items — those are headings, not requirements.",
+ "nice_to_haves": "array of strings — preferred / bonus / nice-to-have QUALIFICATIONS "
+ "(extra skills, prior experience, certifications). Do NOT include "
+ "benefits, perks, or compensation (vacation, PTO, parental leave, "
+ "health / medical / dental / vision insurance, HSA, 401(k), stock "
+ "options, RSU, wellness stipend, gym, remote stipend) — those are "
+ "what the company offers the candidate, not what the candidate "
+ "needs to bring.",
}
contract_lines = "\n".join(
'- "{key}": {description}'.format(key=key, description=description)
@@ -152,6 +229,20 @@ def parse(
"experience_requirement": _coerce_string(payload.get("experience_requirement")),
"hard_skills": _coerce_string_list(payload.get("hard_skills"), limit=40),
"soft_skills": _coerce_string_list(payload.get("soft_skills"), limit=20),
- "must_haves": _coerce_string_list(payload.get("must_haves"), limit=10),
- "nice_to_haves": _coerce_string_list(payload.get("nice_to_haves"), limit=10),
+ # must_haves / nice_to_haves get the extra scrub passes:
+ # strip section-header artifacts the LLM occasionally echoes
+ # as list items, and drop benefit / perk vocabulary that
+ # shouldn't be matched against the candidate's skills.
+ "must_haves": _coerce_string_list(
+ payload.get("must_haves"),
+ limit=10,
+ drop_section_labels=True,
+ drop_benefits=True,
+ ),
+ "nice_to_haves": _coerce_string_list(
+ payload.get("nice_to_haves"),
+ limit=10,
+ drop_section_labels=True,
+ drop_benefits=True,
+ ),
}
diff --git a/tests/test_jd_llm_parser_service.py b/tests/test_jd_llm_parser_service.py
index 695c123..6c265fd 100644
--- a/tests/test_jd_llm_parser_service.py
+++ b/tests/test_jd_llm_parser_service.py
@@ -1,4 +1,42 @@
-from src.services.jd_llm_parser_service import JobDescriptionLLMParserService
+"""Tests for `src/services/jd_llm_parser_service.py`.
+
+Two layers of behavior are pinned here:
+
+ * The LLM call's `run_json_prompt` kwargs — output budget + retry
+ safety net. A tight cap with retry disabled used to truncate
+ detailed JDs and silently degrade build_job_description_from_text
+ _auto into the deterministic fallback, which then cascaded
+ through fit analysis, tailoring, and the cover letter.
+
+ * The deterministic scrub passes between the LLM payload and the
+ JobDescription handed back to the caller:
+ - `_is_section_label_artifact` drops "REQUIREMENTS" /
+ "MUST-HAVES" / "QUALIFICATIONS" / etc. that the model
+ occasionally echoes as list items because the JD's headings
+ were inlined adjacent to a bullet block (the n8n "AI Product
+ Builder" listing tripped this).
+ - `_looks_like_benefit` drops compensation / perks vocabulary
+ (vacation, PTO, parental leave, medical/dental/vision, HSA,
+ 401(k), stock options, wellness stipend) that some LLM passes
+ swept into `nice_to_haves` when a "Benefits" block sat right
+ above the "Nice to have" block. Benefits are what the company
+ offers, not what the candidate brings — matching against
+ them produces nonsense signal.
+ - `_coerce_string_list(drop_section_labels=, drop_benefits=)` —
+ the public knob the parser uses on must_haves / nice_to_haves.
+
+The full-fidelity LLM call itself isn't exercised here — that's
+covered by `tests/quality/jd_parser_quality_runner.py` against
+fixtures.
+"""
+from __future__ import annotations
+
+from src.services.jd_llm_parser_service import (
+ JobDescriptionLLMParserService,
+ _coerce_string_list,
+ _is_section_label_artifact,
+ _looks_like_benefit,
+)
class _RecordingOpenAIService:
@@ -42,3 +80,104 @@ def test_jd_parser_requests_generous_budget_and_enables_retry():
assert recorder.kwargs is not None
assert recorder.kwargs["max_completion_tokens"] >= 4000
assert recorder.kwargs["allow_output_budget_retry"] is True
+
+
+def test_section_label_artifact_matches_common_headers():
+ samples = [
+ "REQUIREMENTS",
+ "Must-Haves",
+ "MUST HAVES",
+ "must_haves",
+ "Nice to have",
+ "Nice-to-haves",
+ "Qualifications:",
+ "PREFERRED",
+ "Good signals",
+ "Good to have",
+ "Bonus",
+ "Bonus points",
+ "What you'll do",
+ "What we're looking for",
+ ]
+ for sample in samples:
+ assert _is_section_label_artifact(sample), sample
+
+
+def test_section_label_artifact_leaves_real_requirements_alone():
+ samples = [
+ "5+ years building production backend services",
+ "BSc in Computer Science",
+ "Strong English communication",
+ "Experience with PostgreSQL at scale",
+ # 'requirement' (singular noun, not a heading) embedded in a
+ # sentence shouldn't false-positive.
+ "Experience meeting product requirement docs",
+ ]
+ for sample in samples:
+ assert not _is_section_label_artifact(sample), sample
+
+
+def test_looks_like_benefit_matches_compensation_vocab():
+ samples = [
+ "Unlimited vacation",
+ "Generous PTO",
+ "Paid time off",
+ "Parental leave",
+ "Health, dental and vision insurance",
+ "Medical, dental, vision coverage",
+ "HSA contribution",
+ "401(k) match",
+ "401k retirement plan",
+ "Stock options and RSU grants",
+ "Monthly wellness stipend",
+ "Home office stipend",
+ "Commuter benefit",
+ ]
+ for sample in samples:
+ assert _looks_like_benefit(sample), sample
+
+
+def test_looks_like_benefit_leaves_real_requirements_alone():
+ samples = [
+ "Experience designing scalable APIs",
+ "Comfort with Python and Django",
+ "Track record shipping production ML systems",
+ "Strong analytical and product-thinking skills",
+ # "stock" appears (not stock options) — shouldn't match.
+ "Familiarity with stock-management systems",
+ ]
+ for sample in samples:
+ assert not _looks_like_benefit(sample), sample
+
+
+def test_coerce_string_list_drops_artifacts_and_benefits_when_enabled():
+ raw = [
+ "5+ years building production backend services",
+ "REQUIREMENTS",
+ "MUST-HAVES",
+ "BSc in Computer Science",
+ "Unlimited PTO",
+ "401(k) match",
+ "Strong English communication",
+ "",
+ "5+ years building production backend services", # duplicate
+ ]
+ cleaned = _coerce_string_list(
+ raw, limit=20, drop_section_labels=True, drop_benefits=True
+ )
+ assert cleaned == [
+ "5+ years building production backend services",
+ "BSc in Computer Science",
+ "Strong English communication",
+ ]
+
+
+def test_coerce_string_list_default_behavior_unchanged():
+ # Without the new flags the function should behave exactly as
+ # before: dedupe + strip + drop empties, but pass headers /
+ # benefits through. The hard_skills / soft_skills call sites rely
+ # on this — a tool name like "401k Plan SDK" (yes, hypothetical)
+ # shouldn't get killed by the benefits filter on the skills list.
+ raw = ["Python", "REQUIREMENTS", "Unlimited PTO", "", "python"]
+ cleaned = _coerce_string_list(raw)
+ assert cleaned == ["Python", "REQUIREMENTS", "Unlimited PTO"]
diff --git a/tests/test_job_search_service.py b/tests/test_job_search_service.py
index ffbbe9b..96a8e4d 100644
--- a/tests/test_job_search_service.py
+++ b/tests/test_job_search_service.py
@@ -285,6 +285,16 @@ def search(
posted_within_days,
limit,
offset,
+ # JobSearchService now also threads work_modes / employment_
+ # types / sort_by through to the store after the filter-
+ # passthrough fix. The fake doesn't filter on them — the offset
+ # pagination tests don't care about facet behavior — but the
+ # signature must accept them or the service call would error
+ # with "unexpected keyword argument" the moment the fix
+ # lands.
+ work_modes=None,
+ employment_types=None,
+ sort_by="relevance",
):
self.calls.append({"limit": limit, "offset": offset})
return self._rows[offset : offset + limit]
@@ -330,3 +340,49 @@ def test_cached_search_full_final_page_still_reports_has_more():
assert len(result.results) == 10
assert result.has_more is True
+
+
+def test_search_cached_threads_work_modes_and_sort_by_to_store():
+ """Regression: `JobSearchService.search_cached` used to rebuild
+ `normalized_query` without copying the new filter / sort fields,
+ so the UI's Work-mode dropdown and Sort selector silently no-op'd
+ against the cache. This test pins the contract that those values
+ reach `store.search()` exactly as supplied.
+ """
+
+ class _CapturingStore:
+ def __init__(self):
+ self.last_kwargs = None
+
+ def is_configured(self):
+ return True
+
+ def search(self, **kwargs):
+ self.last_kwargs = kwargs
+ return []
+
+ captured = _CapturingStore()
+ service = JobSearchService(sources=[], cache_store=captured)
+ query = JobSearchQuery(
+ query="data engineer",
+ location="London",
+ page_size=15,
+ work_modes=["remote", "hybrid"],
+ employment_types=["fulltime"],
+ sort_by="newest",
+ )
+
+ result = service.search_cached(query)
+
+ assert result.source_status["cache"] == "ok"
+ assert captured.last_kwargs is not None
+ assert captured.last_kwargs["work_modes"] == ["remote", "hybrid"]
+ assert captured.last_kwargs["employment_types"] == ["fulltime"]
+ assert captured.last_kwargs["sort_by"] == "newest"
+ # Empty lists should collapse to None so the RPC's `IS NULL`
+ # short-circuit fires instead of passing an empty array filter.
+ empty_query = JobSearchQuery(query="data engineer", page_size=10)
+ service.search_cached(empty_query)
+ assert captured.last_kwargs["work_modes"] is None
+ assert captured.last_kwargs["employment_types"] is None
+ assert captured.last_kwargs["sort_by"] == "relevance"
diff --git a/tests/test_quota_service.py b/tests/test_quota_service.py
index d6bf535..6dd3d7f 100644
--- a/tests/test_quota_service.py
+++ b/tests/test_quota_service.py
@@ -1,4 +1,9 @@
from src.auth_service import AuthService
+from src.config import (
+ FREE_TIER_MAX_CALLS_PER_DAY,
+ PAID_TIER_MAX_CALLS_PER_DAY,
+ get_daily_quota_for_plan,
+)
from src.quota_service import QuotaService
from src.schemas import DailyQuotaStatus
@@ -76,4 +81,46 @@ def test_quota_service_marks_quota_exhausted_when_daily_limit_is_reached():
assert status.quota_exhausted is True
assert status.remaining_calls == 0
- assert status.remaining_total_tokens == 0
\ No newline at end of file
+ assert status.remaining_total_tokens == 0
+
+
+def test_business_tier_gets_paid_daily_caps_not_free():
+ """Regression: `get_daily_quota_for_plan` used to lack a "business"
+ branch — Business users fell through to the FREE caps (12 calls /
+ 60k tokens per day), silently throttling paying customers on the
+ daily cost-limiter even though the monthly TIER_CAPS table grants
+ them generous feature quotas. They should share the Pro daily cap.
+ """
+ business = get_daily_quota_for_plan("business")
+ pro = get_daily_quota_for_plan("pro")
+ free = get_daily_quota_for_plan("free")
+
+ assert business["max_calls"] == PAID_TIER_MAX_CALLS_PER_DAY
+ assert business["max_calls"] == pro["max_calls"]
+ assert business["max_calls"] != free["max_calls"]
+ assert business["max_total_tokens"] == pro["max_total_tokens"]
+ # plan_tier is echoed lowercase so the downstream label is honest
+ # about which tier the user is on.
+ assert business["plan_tier"] == "business"
+
+
+def test_internal_tier_remains_unlimited():
+ """Internal / admin emails get no daily cap (None == unlimited).
+ The plan_tier label flows through so the UI can render
+ "Unlimited (Internal)" when the dev account is signed in."""
+ internal = get_daily_quota_for_plan("internal")
+ admin = get_daily_quota_for_plan("admin")
+
+ assert internal["max_calls"] is None
+ assert internal["max_total_tokens"] is None
+ assert internal["plan_tier"] == "internal"
+ assert admin["max_calls"] is None
+ assert admin["plan_tier"] == "admin"
+
+
+def test_unknown_tier_falls_back_to_free():
+ """Defensive fallback: an unrecognised plan_tier (typo, future
+ tier we haven't shipped yet) should resolve to the FREE caps
+ rather than crashing or returning unlimited."""
+ unknown = get_daily_quota_for_plan("enterprise_xl")
+ assert unknown["max_calls"] == FREE_TIER_MAX_CALLS_PER_DAY
\ No newline at end of file