From 5953d8712014fd7a66d718f9332474fd303c4e8e Mon Sep 17 00:00:00 2001 From: jwalkorat Date: Mon, 18 May 2026 10:46:30 +0530 Subject: [PATCH 1/2] Fix skill parsing for skill names containing commas --- static/script.js | 7 ++++--- utils/recommender.py | 34 ++++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/static/script.js b/static/script.js index aebc9225..2bfc2625 100644 --- a/static/script.js +++ b/static/script.js @@ -350,9 +350,10 @@ if (isIndexPage) { } function syncSkillsHiddenInput() { - // Keep the hidden in sync for form serialisation - // The API expects a comma-separated string, so join the array that way - skillsHidden.value = selectedSkills.join(", "); + // Keep the hidden in sync for form serialisation. + // JSON.stringify preserves skill names that contain commas (e.g. "HTML, CSS") + // so the backend can reconstruct the exact array without mis-splitting. + skillsHidden.value = JSON.stringify(selectedSkills); } updateQuickPickState(); diff --git a/utils/recommender.py b/utils/recommender.py index 308c14f4..d8cdf84b 100644 --- a/utils/recommender.py +++ b/utils/recommender.py @@ -32,18 +32,32 @@ def parse_skills(skills_string): """ - Convert a raw comma-separated skills string into - a normalized lowercase list. + Convert a skills string into a normalized lowercase list. - Example: - "JS, HTML5, CSS3" -> ["javascript", "html", "css"] - """ + Accepts two formats: + 1. JSON array (preferred): '["HTML, CSS", "JavaScript"]' + Handles skill names that contain commas without mis-splitting. + 2. Comma-separated string (legacy fallback): "HTML, CSS, JavaScript" - raw_skills = [ - s.strip().lower() - for s in skills_string.split(",") - if s.strip() - ] + Example: + '["JS", "HTML5", "CSS3"]' -> ["javascript", "html", "css"] + """ + import json + + try: + # Preferred path: frontend sends a JSON-serialized array + parsed = json.loads(skills_string) + if isinstance(parsed, list): + raw_skills = [s.strip().lower() for s in parsed if isinstance(s, str) and s.strip()] + else: + raise ValueError("Parsed JSON is not a list") + except (json.JSONDecodeError, ValueError, TypeError): + # Fallback: handle plain comma-separated strings + raw_skills = [ + s.strip().lower() + for s in skills_string.split(",") + if s.strip() + ] normalized_skills = [ SKILL_ALIASES.get(skill, skill) From 8019ea206c6bb1236b2187a91fdeecaf1e764156 Mon Sep 17 00:00:00 2001 From: jwalkorat Date: Mon, 25 May 2026 22:49:26 +0530 Subject: [PATCH 2/2] fix(skills): add parser tests and clean duplicate serialization logic --- static/script.js | 51 ++++---------------------------------------- tests/test_basic.py | 26 ++++++++++++++++++++++ utils/recommender.py | 2 ++ 3 files changed, 32 insertions(+), 47 deletions(-) diff --git a/static/script.js b/static/script.js index 3e55beab..434bacd5 100644 --- a/static/script.js +++ b/static/script.js @@ -412,12 +412,6 @@ if (clearFiltersBtn) { } function syncSkillsHiddenInput() { - if (!skillsHidden) { - var skillsHidden = document.getElementById("skills"); - } - // Keep the hidden in sync for form serialisation. - // JSON.stringify preserves skill names that contain commas (e.g. "HTML, CSS") - // so the backend can reconstruct the exact array without mis-splitting. if (skillsHidden) { skillsHidden.value = JSON.stringify(selectedSkills); } @@ -513,65 +507,28 @@ if (clearFiltersBtn) { return res.json(); }) .then(function (data) { - setLoadingState(false); if (data.error) { var generalErr = document.getElementById("form-error-general"); - if (generalErr) { generalErr.textContent = data.error; } - return; } renderResults(data.projects || [], data.message); }) - .catch(function () { - + .catch(function (err) { + // this runs if the network request itself fails setLoadingState(false); - //combine form values into an object to send to server/api - var payload = { - // Prefer the hidden input value; fall back to raw text box if hidden input is empty - skills: skillsHidden.value.trim() || skillsTextInput.value.trim(), - level: document.getElementById("level").value, - interest: document.getElementById("interest").value, - time: document.getElementById("time").value - }; - - //post the data to backend api as JSON, then handle the response - fetch("/api/recommend", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(payload) //convert object to json string - }) - .then(function (res) { return res.json(); }) //parse the response as JSON - .then(function (data) { - setLoadingState(false); - var generalErr = document.getElementById("form-error-general"); - if (generalErr) { - generalErr.textContent = - "Something went wrong. Please try again."; + generalErr.textContent = "Something went wrong. Please try again."; } + console.error("API request failed:", err); }); }); - if (data.error) { - var generalErr = document.getElementById("form-error-general"); - if (generalErr) generalErr.textContent = data.error; - return; - } - renderResults(data.projects || [], data.message); - }) - .catch(function (err) { - // this runs if the network request itself fails - setLoadingState(false); - var generalErr = document.getElementById("form-error-general"); - if (generalErr) generalErr.textContent = "Something went wrong. Please try again."; - console.error("API request failed:", err); - }); }); // Manages the loading state of the form and results section(whats visible or not) diff --git a/tests/test_basic.py b/tests/test_basic.py index 982182cd..b4d490d2 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -90,6 +90,32 @@ def test_parse_skills_single_entry(): assert parse_skills("JavaScript") == ["javascript"] +def test_parse_skills_valid_json_array(): + """parse_skills should parse a valid JSON array of skills.""" + result = parse_skills('["Python","React"]') + assert result == ["python", "react"] + + +def test_parse_skills_malformed_json_handling(): + """parse_skills should handle malformed JSON gracefully using fallback.""" + # Should not crash, and parses via fallback comma-splitting behavior + result = parse_skills('["Python",]') + assert isinstance(result, list) + assert len(result) > 0 + + +def test_parse_skills_legacy_fallback(): + """parse_skills should parse a legacy comma-separated string.""" + result = parse_skills("Python,React") + assert result == ["python", "react"] + + +def test_parse_skills_containing_commas(): + """parse_skills should preserve skill names containing commas when using JSON.""" + result = parse_skills('["HTML, CSS","JavaScript"]') + assert result == ["html, css", "javascript"] + + def test_score_single_project_full_match(): """A project that matches all four criteria should receive the maximum score.""" project = { diff --git a/utils/recommender.py b/utils/recommender.py index e7434e3c..39a2e740 100644 --- a/utils/recommender.py +++ b/utils/recommender.py @@ -44,6 +44,8 @@ def parse_skills(skills_string): """ import json + # Skills are serialized as JSON arrays. + # Legacy comma-separated values remain supported for compatibility. try: # Preferred path: frontend sends a JSON-serialized array parsed = json.loads(skills_string)