From e3488f31176336814a8c141c4d09cb36f35eef12 Mon Sep 17 00:00:00 2001
From: Enxhi Tabaku <enxhi.tabaku@gmail.com>
Date: Thu, 19 Mar 2026 01:57:17 +0100
Subject: [PATCH] fix(glassdoor): fix CSRF token fetch URL and non-fatal error
 handling

Two bugs prevented Glassdoor from returning any results:

1. _get_csrf_token() was fetching /Job/computer-science-jobs.htm which
   now returns 404 after Glassdoor's Next.js migration. Changed to fetch
   the homepage (/) which reliably returns the token.

2. _fetch_jobs_page() treated any "errors" key in the GraphQL response
   as fatal, dropping all job results. Glassdoor commonly returns non-
   critical 503s on peripheral fields (e.g. jobsPageSeoData) while the
   actual jobListings data is intact. Now only errors on the jobListings
   path itself are treated as fatal.

Verified: 30 jobs returned for Spain/engineer with both fixes applied.
---
 jobspy/glassdoor/__init__.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/jobspy/glassdoor/__init__.py b/jobspy/glassdoor/__init__.py
index 8de7915f..cd00abc5 100644
--- a/jobspy/glassdoor/__init__.py
+++ b/jobspy/glassdoor/__init__.py
@@ -121,7 +121,16 @@ def _fetch_jobs_page(
                 raise GlassdoorException(exc_msg)
             res_json = response.json()[0]
             if "errors" in res_json:
-                raise ValueError("Error encountered in API response")
+                # Only treat errors on the jobListings field as fatal.
+                # Glassdoor commonly returns non-critical 503s on peripheral
+                # fields (e.g. jobsPageSeoData) while the job data is intact.
+                job_errors = [
+                    e for e in res_json["errors"]
+                    if "jobListings" in str(e.get("path", []))
+                    and "jobsPageSeoData" not in str(e.get("path", []))
+                ]
+                if job_errors:
+                    raise ValueError(f"Error encountered in jobListings API response: {job_errors}")
         except (
             requests.exceptions.ReadTimeout,
             GlassdoorException,
@@ -151,9 +160,10 @@ def _fetch_jobs_page(
 
     def _get_csrf_token(self):
         """
-        Fetches csrf token needed for API by visiting a generic page
+        Fetches csrf token needed for API by visiting the homepage.
+        Previously used /Job/computer-science-jobs.htm which now returns 404.
         """
-        res = self.session.get(f"{self.base_url}/Job/computer-science-jobs.htm")
+        res = self.session.get(f"{self.base_url}/")
         pattern = r'"token":\s*"([^"]+)"'
         matches = re.findall(pattern, res.text)
         token = None