|
| 1 | +# ChatGPT Deep Research Integration Guide |
| 2 | + |
| 3 | +This guide enables ChatGPT and other LLMs to effectively use JobSpy for job market research, BD intelligence, and automated job searches. |
| 4 | + |
| 5 | +## Quick Start for ChatGPT |
| 6 | + |
| 7 | +### Installation Check |
| 8 | +```python |
| 9 | +# First, verify JobSpy is installed |
| 10 | +try: |
| 11 | + from jobspy import scrape_jobs |
| 12 | + print("JobSpy is ready!") |
| 13 | +except ImportError: |
| 14 | + import subprocess |
| 15 | + subprocess.run(["pip", "install", "python-jobspy"]) |
| 16 | + from jobspy import scrape_jobs |
| 17 | +``` |
| 18 | + |
| 19 | +### Basic Search Pattern |
| 20 | +```python |
| 21 | +from jobspy import scrape_jobs |
| 22 | +import json |
| 23 | + |
| 24 | +# Search for jobs |
| 25 | +jobs = scrape_jobs( |
| 26 | + site_name=["indeed", "linkedin"], |
| 27 | + search_term="YOUR_SEARCH_TERM", |
| 28 | + location="LOCATION", |
| 29 | + results_wanted=15, |
| 30 | + verbose=0 |
| 31 | +) |
| 32 | + |
| 33 | +# Convert to JSON for analysis |
| 34 | +result = { |
| 35 | + "count": len(jobs), |
| 36 | + "jobs": jobs.to_dict(orient="records") |
| 37 | +} |
| 38 | +print(json.dumps(result, indent=2, default=str)) |
| 39 | +``` |
| 40 | + |
| 41 | +--- |
| 42 | + |
| 43 | +## Prompt Templates for ChatGPT |
| 44 | + |
| 45 | +### 1. General Job Search |
| 46 | +**User Prompt:** |
| 47 | +> "Find [NUMBER] [JOB_TITLE] jobs in [LOCATION]" |
| 48 | +
|
| 49 | +**ChatGPT Action:** |
| 50 | +```python |
| 51 | +from jobspy import scrape_jobs |
| 52 | +import json |
| 53 | + |
| 54 | +jobs = scrape_jobs( |
| 55 | + site_name=["indeed", "linkedin"], |
| 56 | + search_term="[JOB_TITLE]", |
| 57 | + location="[LOCATION]", |
| 58 | + results_wanted=[NUMBER], |
| 59 | + verbose=0 |
| 60 | +) |
| 61 | + |
| 62 | +# Display results |
| 63 | +for _, job in jobs.head(10).iterrows(): |
| 64 | + print(f"• {job['title']} at {job['company']}") |
| 65 | + print(f" Location: {job['location']}") |
| 66 | + print(f" URL: {job['job_url']}\n") |
| 67 | +``` |
| 68 | + |
| 69 | +### 2. Remote Job Search |
| 70 | +**User Prompt:** |
| 71 | +> "Find remote [JOB_TITLE] jobs posted in the last [HOURS] hours" |
| 72 | +
|
| 73 | +**ChatGPT Action:** |
| 74 | +```python |
| 75 | +jobs = scrape_jobs( |
| 76 | + site_name=["indeed", "linkedin"], |
| 77 | + search_term="[JOB_TITLE]", |
| 78 | + is_remote=True, |
| 79 | + hours_old=[HOURS], |
| 80 | + results_wanted=20, |
| 81 | + verbose=0 |
| 82 | +) |
| 83 | +``` |
| 84 | + |
| 85 | +### 3. Salary Research |
| 86 | +**User Prompt:** |
| 87 | +> "What's the salary range for [JOB_TITLE] in [LOCATION]?" |
| 88 | +
|
| 89 | +**ChatGPT Action:** |
| 90 | +```python |
| 91 | +jobs = scrape_jobs( |
| 92 | + site_name=["indeed", "glassdoor"], |
| 93 | + search_term="[JOB_TITLE]", |
| 94 | + location="[LOCATION]", |
| 95 | + results_wanted=50, |
| 96 | + verbose=0 |
| 97 | +) |
| 98 | + |
| 99 | +# Analyze salary data |
| 100 | +salaries = jobs[jobs['min_amount'].notna()][['title', 'company', 'min_amount', 'max_amount', 'interval']] |
| 101 | +print(f"Salary data from {len(salaries)} postings:") |
| 102 | +print(f"Range: ${salaries['min_amount'].min():,.0f} - ${salaries['max_amount'].max():,.0f}") |
| 103 | +print(f"Median: ${salaries[['min_amount', 'max_amount']].mean().mean():,.0f}") |
| 104 | +``` |
| 105 | + |
| 106 | +### 4. Company-Specific Search |
| 107 | +**User Prompt:** |
| 108 | +> "Find all jobs at [COMPANY_NAME]" |
| 109 | +
|
| 110 | +**ChatGPT Action:** |
| 111 | +```python |
| 112 | +jobs = scrape_jobs( |
| 113 | + site_name=["indeed"], |
| 114 | + search_term=f'"{[COMPANY_NAME]}"', # Exact match |
| 115 | + results_wanted=50, |
| 116 | + verbose=0 |
| 117 | +) |
| 118 | + |
| 119 | +company_jobs = jobs[jobs['company'].str.contains('[COMPANY_NAME]', case=False, na=False)] |
| 120 | +print(f"Found {len(company_jobs)} jobs at [COMPANY_NAME]") |
| 121 | +``` |
| 122 | + |
| 123 | +### 5. BD Intelligence - Hiring Trends |
| 124 | +**User Prompt:** |
| 125 | +> "Which companies are hiring the most [JOB_TYPE] in [INDUSTRY/LOCATION]?" |
| 126 | +
|
| 127 | +**ChatGPT Action:** |
| 128 | +```python |
| 129 | +jobs = scrape_jobs( |
| 130 | + site_name=["indeed", "linkedin"], |
| 131 | + search_term="[JOB_TYPE]", |
| 132 | + location="[LOCATION]", |
| 133 | + results_wanted=100, |
| 134 | + verbose=0 |
| 135 | +) |
| 136 | + |
| 137 | +# Analyze by company |
| 138 | +company_counts = jobs['company'].value_counts().head(15) |
| 139 | +print("Top Hiring Companies:") |
| 140 | +for company, count in company_counts.items(): |
| 141 | + print(f" {company}: {count} openings") |
| 142 | +``` |
| 143 | + |
| 144 | +### 6. Federal/Cleared Job Search |
| 145 | +**User Prompt:** |
| 146 | +> "Find [CLEARANCE_LEVEL] cleared [JOB_TITLE] positions" |
| 147 | +
|
| 148 | +**ChatGPT Action:** |
| 149 | +```python |
| 150 | +jobs = scrape_jobs( |
| 151 | + site_name=["indeed", "linkedin"], |
| 152 | + search_term=f"[JOB_TITLE] [CLEARANCE_LEVEL]", |
| 153 | + location="Washington, DC", |
| 154 | + results_wanted=30, |
| 155 | + verbose=0 |
| 156 | +) |
| 157 | +``` |
| 158 | + |
| 159 | +### 7. Competitor Analysis |
| 160 | +**User Prompt:** |
| 161 | +> "What positions are [COMPETITOR_COMPANY] hiring for?" |
| 162 | +
|
| 163 | +**ChatGPT Action:** |
| 164 | +```python |
| 165 | +jobs = scrape_jobs( |
| 166 | + site_name=["indeed"], |
| 167 | + search_term=f'"{[COMPETITOR_COMPANY]}"', |
| 168 | + results_wanted=50, |
| 169 | + verbose=0 |
| 170 | +) |
| 171 | + |
| 172 | +# Filter and analyze |
| 173 | +competitor_jobs = jobs[jobs['company'].str.contains('[COMPETITOR_COMPANY]', case=False, na=False)] |
| 174 | +role_distribution = competitor_jobs['title'].value_counts() |
| 175 | +print(f"Roles at [COMPETITOR_COMPANY]:\n{role_distribution}") |
| 176 | +``` |
| 177 | + |
| 178 | +--- |
| 179 | + |
| 180 | +## CLI Usage for ChatGPT Code Execution |
| 181 | + |
| 182 | +The CLI provides a simpler interface when using code execution: |
| 183 | + |
| 184 | +```bash |
| 185 | +# Basic search |
| 186 | +python jobspy_cli.py --search "data scientist" --location "NYC" --format json |
| 187 | + |
| 188 | +# Remote jobs with filters |
| 189 | +python jobspy_cli.py --search "software engineer" --remote --hours 48 --results 20 |
| 190 | + |
| 191 | +# Multiple sites |
| 192 | +python jobspy_cli.py --search "project manager" --sites indeed,linkedin,glassdoor |
| 193 | + |
| 194 | +# JSON input mode (for complex queries) |
| 195 | +echo '{"search_term": "python developer", "location": "Remote", "results_wanted": 10}' | python jobspy_cli.py --json-input |
| 196 | +``` |
| 197 | + |
| 198 | +--- |
| 199 | + |
| 200 | +## Output Formats |
| 201 | + |
| 202 | +### JSON Format (Recommended for LLM) |
| 203 | +```json |
| 204 | +{ |
| 205 | + "success": true, |
| 206 | + "count": 15, |
| 207 | + "jobs": [ |
| 208 | + { |
| 209 | + "site": "indeed", |
| 210 | + "title": "Software Engineer", |
| 211 | + "company": "TechCorp", |
| 212 | + "location": "San Francisco, CA", |
| 213 | + "job_type": "fulltime", |
| 214 | + "min_amount": 120000, |
| 215 | + "max_amount": 180000, |
| 216 | + "interval": "yearly", |
| 217 | + "job_url": "https://indeed.com/...", |
| 218 | + "description": "Job description here...", |
| 219 | + "date_posted": "2025-01-15", |
| 220 | + "is_remote": false |
| 221 | + } |
| 222 | + ] |
| 223 | +} |
| 224 | +``` |
| 225 | + |
| 226 | +### Key Fields for Analysis |
| 227 | +| Field | Description | Use Case | |
| 228 | +|-------|-------------|----------| |
| 229 | +| `title` | Job title | Role identification | |
| 230 | +| `company` | Company name | BD targeting | |
| 231 | +| `location` | Job location | Geographic analysis | |
| 232 | +| `min_amount`/`max_amount` | Salary range | Compensation research | |
| 233 | +| `interval` | Salary period | Normalize salaries | |
| 234 | +| `job_url` | Direct link | Reference/verification | |
| 235 | +| `description` | Full job description | Skills extraction | |
| 236 | +| `date_posted` | Posting date | Freshness filter | |
| 237 | +| `is_remote` | Remote flag | Work arrangement | |
| 238 | + |
| 239 | +--- |
| 240 | + |
| 241 | +## Best Practices for ChatGPT |
| 242 | + |
| 243 | +### 1. Start with Indeed |
| 244 | +Indeed has the best coverage and no rate limiting. Start searches here: |
| 245 | +```python |
| 246 | +jobs = scrape_jobs(site_name=["indeed"], ...) |
| 247 | +``` |
| 248 | + |
| 249 | +### 2. Use Appropriate Result Counts |
| 250 | +- Quick overview: `results_wanted=10` |
| 251 | +- Standard search: `results_wanted=20` |
| 252 | +- Comprehensive analysis: `results_wanted=50-100` |
| 253 | + |
| 254 | +### 3. Filter by Recency |
| 255 | +Use `hours_old` for fresh postings: |
| 256 | +```python |
| 257 | +jobs = scrape_jobs(..., hours_old=24) # Last 24 hours |
| 258 | +jobs = scrape_jobs(..., hours_old=72) # Last 3 days |
| 259 | +jobs = scrape_jobs(..., hours_old=168) # Last week |
| 260 | +``` |
| 261 | + |
| 262 | +### 4. Handle Empty Results |
| 263 | +```python |
| 264 | +jobs = scrape_jobs(...) |
| 265 | +if jobs.empty: |
| 266 | + print("No jobs found. Try broadening your search.") |
| 267 | +else: |
| 268 | + # Process results |
| 269 | +``` |
| 270 | + |
| 271 | +### 5. Use Boolean Search Operators |
| 272 | +Indeed supports advanced search: |
| 273 | +```python |
| 274 | +# Must include term |
| 275 | +search_term = '"software engineer"' |
| 276 | + |
| 277 | +# Exclude terms |
| 278 | +search_term = 'python developer -junior -entry' |
| 279 | + |
| 280 | +# OR combinations |
| 281 | +search_term = '(python OR java) developer senior' |
| 282 | +``` |
| 283 | + |
| 284 | +--- |
| 285 | + |
| 286 | +## Advanced Use Cases |
| 287 | + |
| 288 | +### BD Intelligence: Growth Signals |
| 289 | +```python |
| 290 | +def identify_growth_companies(industry_keyword, location): |
| 291 | + """Find companies with high hiring activity (growth signals)""" |
| 292 | + jobs = scrape_jobs( |
| 293 | + site_name=["indeed", "linkedin"], |
| 294 | + search_term=industry_keyword, |
| 295 | + location=location, |
| 296 | + results_wanted=100, |
| 297 | + hours_old=168, # Last week |
| 298 | + verbose=0 |
| 299 | + ) |
| 300 | + |
| 301 | + company_analysis = jobs.groupby('company').agg({ |
| 302 | + 'title': 'count', |
| 303 | + 'location': lambda x: list(set(x)) |
| 304 | + }).rename(columns={'title': 'openings'}) |
| 305 | + |
| 306 | + growth_companies = company_analysis[company_analysis['openings'] >= 5] |
| 307 | + return growth_companies.sort_values('openings', ascending=False) |
| 308 | +``` |
| 309 | + |
| 310 | +### CIS Labor Category Mapping |
| 311 | +```python |
| 312 | +def extract_role_requirements(job_title, location): |
| 313 | + """Extract duties and qualifications for labor category mapping""" |
| 314 | + jobs = scrape_jobs( |
| 315 | + site_name=["indeed"], |
| 316 | + search_term=job_title, |
| 317 | + location=location, |
| 318 | + results_wanted=20, |
| 319 | + linkedin_fetch_description=True, |
| 320 | + verbose=0 |
| 321 | + ) |
| 322 | + |
| 323 | + # Return descriptions for analysis |
| 324 | + return jobs[['title', 'company', 'description']].to_dict(orient='records') |
| 325 | +``` |
| 326 | + |
| 327 | +### Competitor Staffing Monitor |
| 328 | +```python |
| 329 | +COMPETITOR_STAFFING = [ |
| 330 | + "Insight Global", "TEKsystems", "Apex Systems", |
| 331 | + "Belcan", "GDIT", "Booz Allen Hamilton" |
| 332 | +] |
| 333 | + |
| 334 | +def monitor_competitors(): |
| 335 | + """Monitor hiring activity at competitor staffing companies""" |
| 336 | + results = {} |
| 337 | + for company in COMPETITOR_STAFFING: |
| 338 | + jobs = scrape_jobs( |
| 339 | + site_name=["indeed"], |
| 340 | + search_term=f'"{company}"', |
| 341 | + results_wanted=30, |
| 342 | + verbose=0 |
| 343 | + ) |
| 344 | + results[company] = { |
| 345 | + "openings": len(jobs), |
| 346 | + "roles": jobs['title'].tolist()[:10] |
| 347 | + } |
| 348 | + return results |
| 349 | +``` |
| 350 | + |
| 351 | +--- |
| 352 | + |
| 353 | +## Troubleshooting |
| 354 | + |
| 355 | +### Rate Limiting (429 Error) |
| 356 | +- **Solution**: Use proxies or switch to Indeed (no rate limiting) |
| 357 | +```python |
| 358 | +jobs = scrape_jobs(..., proxies=["proxy1:port", "proxy2:port"]) |
| 359 | +``` |
| 360 | + |
| 361 | +### No Results |
| 362 | +- Broaden search term |
| 363 | +- Remove location filter |
| 364 | +- Try different sites |
| 365 | +- Check spelling |
| 366 | + |
| 367 | +### LinkedIn Issues |
| 368 | +- LinkedIn is restrictive; use proxies |
| 369 | +- Set `linkedin_fetch_description=False` for faster results |
| 370 | +- Consider using Indeed as primary source |
| 371 | + |
| 372 | +--- |
| 373 | + |
| 374 | +## Reference |
| 375 | + |
| 376 | +### Supported Sites |
| 377 | +| Site | Coverage | Notes | |
| 378 | +|------|----------|-------| |
| 379 | +| `indeed` | Global | Best choice, no rate limits | |
| 380 | +| `linkedin` | Global | Requires proxies for heavy use | |
| 381 | +| `glassdoor` | Major countries | Includes company reviews | |
| 382 | +| `zip_recruiter` | US/Canada | Good salary data | |
| 383 | +| `google` | Global | Aggregates multiple sources | |
| 384 | +| `bayt` | Middle East | Regional specialist | |
| 385 | +| `naukri` | India | Regional specialist | |
| 386 | + |
| 387 | +### Full Parameter List |
| 388 | +See `tool_manifest.json` for complete API documentation. |
0 commit comments