Skip to content

Commit 7422648

Browse files
feat: implement URL-to-tailored-resume pipeline with experience log as source of truth
- Add robust job listing fetcher with URL canonicalization (Indeed vjk->viewjob) - Implement proxy fallback (r.jina.ai) for 403 Forbidden responses - Add optional Selenium fallback for JavaScript-heavy job boards - Create build_resume_from_experience_log.py to use experience log as single source of truth - Normalize resume bullets from strings to dicts for scoring/tailoring - Integrate tailored resume auto-indexing to data/resumes/index.json - Add demo script showing end-to-end workflow - Support HTML/DOCX export with multiple themes Key improvements: - Experience log (data/experiences.json) is now the authoritative resume source - No dependency on outdated master_resume.json - Tailored resumes automatically indexed and viewable in web UI - Handles job board anti-bot measures gracefully - Full end-to-end: URL -> Job Listing -> Resume Build -> Tailor -> Export -> Index Tested with Indeed.com URL (403 fallback to proxy)
1 parent 0124a86 commit 7422648

4 files changed

Lines changed: 744 additions & 12 deletions

File tree

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Demo: Tailor Resume from Experience Log
4+
5+
This script demonstrates the complete workflow:
6+
1. Load job description from local file
7+
2. Build resume from experience log
8+
3. Tailor resume to job
9+
4. Generate HTML output
10+
11+
This shows the new architecture where experience log is the source of truth.
12+
"""
13+
14+
import sys
15+
import json
16+
from pathlib import Path
17+
18+
# Add src to path
19+
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
20+
21+
from tailor import (
22+
ingest_jd,
23+
extract_keywords,
24+
retrieve_rag_context,
25+
select_and_rewrite,
26+
generate_html_resume,
27+
generate_docx_from_html,
28+
)
29+
from build_resume_from_experience_log import build_resume_from_experience_log
30+
31+
32+
def demo_tailor_from_experience_log():
33+
"""Demo: Tailor resume from experience log."""
34+
35+
print("\n" + "="*80)
36+
print("DEMO: TAILOR RESUME FROM EXPERIENCE LOG")
37+
print("="*80 + "\n")
38+
39+
# Step 1: Load job description
40+
print("📋 Step 1: Loading job description...")
41+
job_file = "data/job_listings/senior_devops_engineer.md"
42+
if not Path(job_file).exists():
43+
print(f"❌ Job file not found: {job_file}")
44+
return False
45+
46+
jd_path, jd_text = ingest_jd(job_file)
47+
print(f"✅ Job description loaded ({len(jd_text)} characters)")
48+
print(f" File: {job_file}\n")
49+
50+
# Step 2: Extract keywords
51+
print("🔍 Step 2: Extracting keywords from job description...")
52+
keywords = extract_keywords(jd_text)
53+
print(f"✅ Found {len(keywords)} keywords")
54+
print(f" Top keywords: {', '.join(keywords[:10])}\n")
55+
56+
# Step 3: Build resume from experience log
57+
print("📂 Step 3: Building resume from experience log...")
58+
resume_data = build_resume_from_experience_log("data/experiences.json")
59+
print(f"✅ Resume built from experience log")
60+
print(f" - {len(resume_data['experience'])} experience entries")
61+
print(f" - {len(resume_data['education'])} education entries")
62+
print(f" - {len(resume_data['certifications'])} certifications\n")
63+
64+
# Step 4: Retrieve RAG context
65+
print("🧠 Step 4: Retrieving RAG context...")
66+
rag_context = retrieve_rag_context(jd_text, "data/rag/vector_store.json")
67+
if rag_context and rag_context.get("success"):
68+
num_docs = len(rag_context.get("context", {}).get("documents", []))
69+
print(f"✅ Retrieved {num_docs} relevant experiences from RAG\n")
70+
else:
71+
print("⚠️ RAG retrieval skipped (vector store not available)\n")
72+
rag_context = None
73+
74+
# Step 5: Tailor resume
75+
print("✏️ Step 5: Tailoring resume to job...")
76+
tailored_experience = select_and_rewrite(
77+
resume_data["experience"],
78+
keywords,
79+
rag_context=rag_context
80+
)
81+
resume_data["experience"] = tailored_experience
82+
print(f"✅ Resume tailored to job\n")
83+
84+
# Step 6: Generate HTML
85+
print("🎨 Step 6: Generating HTML resume...")
86+
output_html = "out/demo_tailored_resume.html"
87+
generate_html_resume(
88+
resume_data,
89+
output_html,
90+
theme="modern"
91+
)
92+
print(f"✅ HTML resume generated")
93+
print(f" Output: {output_html}\n")
94+
95+
# Step 7: Generate DOCX
96+
print("📄 Step 7: Generating DOCX resume...")
97+
output_docx = "out/demo_tailored_resume.docx"
98+
try:
99+
generate_docx_from_html(output_html, output_docx)
100+
print(f"✅ DOCX resume generated")
101+
print(f" Output: {output_docx}\n")
102+
except Exception as e:
103+
print(f"⚠️ DOCX generation skipped: {e}\n")
104+
105+
# Summary
106+
print("="*80)
107+
print("✅ DEMO COMPLETE")
108+
print("="*80)
109+
print("\n📊 Summary:")
110+
print(f" ✓ Job description: {job_file}")
111+
print(f" ✓ Keywords extracted: {len(keywords)}")
112+
print(f" ✓ Resume built from: data/experiences.json")
113+
print(f" ✓ Experience entries tailored: {len(tailored_experience)}")
114+
print(f" ✓ HTML output: {output_html}")
115+
print(f" ✓ DOCX output: {output_docx}")
116+
print("\n🎯 Architecture:")
117+
print(" experiences.json (source of truth)")
118+
print(" ↓")
119+
print(" build_resume_from_experience_log()")
120+
print(" ↓")
121+
print(" select_and_rewrite() [tailoring]")
122+
print(" ↓")
123+
print(" generate_html_resume()")
124+
print(" ↓")
125+
print(" HTML/DOCX output")
126+
print("\n✨ Key Points:")
127+
print(" • Experience log is the single source of truth")
128+
print(" • No dependency on outdated master_resume.json")
129+
print(" • Rich metadata (skills, technologies, techniques, principles)")
130+
print(" • RAG-enhanced tailoring for better keyword matching")
131+
print(" • Supports multiple output formats (HTML, DOCX)")
132+
print("\n")
133+
134+
return True
135+
136+
137+
if __name__ == "__main__":
138+
try:
139+
success = demo_tailor_from_experience_log()
140+
sys.exit(0 if success else 1)
141+
except Exception as e:
142+
print(f"\n❌ Error: {e}")
143+
import traceback
144+
traceback.print_exc()
145+
sys.exit(1)
146+
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Build Resume from Experience Log
4+
5+
This module builds a complete resume from the experience log (data/experiences.json),
6+
which is the source of truth for all resume data.
7+
8+
The experience log contains:
9+
- Experience entries (employer, role, dates, location, bullets, skills, technologies, techniques, principles)
10+
- Education entries (id starts with 'edu-')
11+
- Certification entries (id starts with 'cert-')
12+
13+
This replaces the need for master_resume.json as the primary source.
14+
"""
15+
16+
import json
17+
from pathlib import Path
18+
from typing import Dict, List, Any, Optional
19+
20+
21+
def build_resume_from_experience_log(
22+
experience_log_path: str = "data/experiences.json",
23+
personal_info: Optional[Dict[str, str]] = None,
24+
) -> Dict[str, Any]:
25+
"""
26+
Build a complete resume from the experience log.
27+
28+
Args:
29+
experience_log_path: Path to experiences.json
30+
personal_info: Optional personal info (name, email, phone, location, title, summary)
31+
If not provided, uses defaults
32+
33+
Returns:
34+
Complete resume dictionary ready for HTML generation
35+
"""
36+
# Load experience log
37+
exp_log_path = Path(experience_log_path)
38+
if not exp_log_path.exists():
39+
raise FileNotFoundError(f"Experience log not found: {experience_log_path}")
40+
41+
with open(exp_log_path, 'r', encoding='utf-8') as f:
42+
experiences = json.load(f)
43+
44+
# Default personal info
45+
default_personal_info = {
46+
"name": "Sidney Jones",
47+
"title": "Senior DevOps Software Engineer",
48+
"location": "West Bloomfield, MI",
49+
"email": "sjones@bpmsoftwaresolutions.com",
50+
"phone": "(248) 802-1847",
51+
"summary": "Accomplished technology leader with extensive experience in driving software engineering management, enterprise architecture, and large-scale transformation initiatives.",
52+
}
53+
54+
# Merge with provided personal info
55+
if personal_info:
56+
default_personal_info.update(personal_info)
57+
58+
# Initialize resume structure
59+
resume = {
60+
"name": default_personal_info.get("name", ""),
61+
"title": default_personal_info.get("title", ""),
62+
"location": default_personal_info.get("location", ""),
63+
# Keep backward-compatible root fields
64+
"email": default_personal_info.get("email", ""),
65+
"phone": default_personal_info.get("phone", ""),
66+
# New structured contact object for HTML generator
67+
"contact": {
68+
"email": default_personal_info.get("email", ""),
69+
"phone": default_personal_info.get("phone", ""),
70+
},
71+
"summary": default_personal_info.get("summary", ""),
72+
"experience": [],
73+
"education": [],
74+
"certifications": [],
75+
"technical_proficiencies": {},
76+
"areas_of_expertise": [],
77+
}
78+
79+
# Separate entries by type
80+
experience_entries = []
81+
education_entries = []
82+
certification_entries = []
83+
84+
for entry in experiences:
85+
entry_id = entry.get("id", "")
86+
87+
if entry_id.startswith("edu-"):
88+
education_entries.append(entry)
89+
elif entry_id.startswith("cert-"):
90+
certification_entries.append(entry)
91+
else:
92+
experience_entries.append(entry)
93+
94+
# Process experience entries
95+
print(f"📝 Processing {len(experience_entries)} experience entries...")
96+
for exp in experience_entries:
97+
# Combine all tags for this experience
98+
all_tags = []
99+
all_tags.extend(exp.get("skills", []) or [])
100+
all_tags.extend(exp.get("technologies", []) or [])
101+
all_tags.extend(exp.get("techniques", []) or [])
102+
all_tags.extend(exp.get("principles", []) or [])
103+
104+
# Remove duplicates while preserving order
105+
seen = set()
106+
unique_tags = []
107+
for tag in all_tags:
108+
if tag and tag.lower() not in seen:
109+
seen.add(tag.lower())
110+
unique_tags.append(tag)
111+
112+
# Normalize bullets to list of dicts expected by scorer/generator
113+
raw_bullets = exp.get("bullets", []) or []
114+
norm_bullets = []
115+
for b in raw_bullets:
116+
if isinstance(b, dict):
117+
# Ensure text key exists
118+
text_val = b.get("text") if isinstance(b.get("text"), str) else str(b.get("text", ""))
119+
tags_val = b.get("tags") if isinstance(b.get("tags"), list) else []
120+
# Augment with experience-level tags if not present
121+
if not tags_val and unique_tags:
122+
tags_val = unique_tags
123+
norm_bullets.append({"text": text_val, "tags": tags_val})
124+
else:
125+
# Simple string bullet
126+
norm_bullets.append({"text": str(b), "tags": unique_tags})
127+
128+
experience_item = {
129+
"employer": exp.get("employer", ""),
130+
"role": exp.get("role", ""),
131+
"dates": exp.get("dates", ""),
132+
"location": exp.get("location", ""),
133+
"bullets": norm_bullets,
134+
"skills": exp.get("skills", []),
135+
"technologies": exp.get("technologies", []),
136+
"techniques": exp.get("techniques", []),
137+
"principles": exp.get("principles", []),
138+
}
139+
140+
if unique_tags:
141+
experience_item["tags"] = unique_tags
142+
143+
resume["experience"].append(experience_item)
144+
145+
# Process education entries
146+
print(f"🎓 Processing {len(education_entries)} education entries...")
147+
for edu in education_entries:
148+
education_item = {
149+
"degree": edu.get("role", ""),
150+
"institution": edu.get("employer", ""),
151+
"location": edu.get("location", ""),
152+
"year": edu.get("dates", ""),
153+
}
154+
resume["education"].append(education_item)
155+
156+
# Process certification entries
157+
print(f"🏆 Processing {len(certification_entries)} certification entries...")
158+
for cert in certification_entries:
159+
certification_item = {
160+
"name": cert.get("role", ""),
161+
"issuer": cert.get("employer", ""),
162+
"date": cert.get("dates", ""),
163+
}
164+
resume["certifications"].append(certification_item)
165+
166+
# Extract technical proficiencies from all skills/technologies
167+
print("🔧 Extracting technical proficiencies...")
168+
all_skills = set()
169+
all_techs = set()
170+
171+
for exp in experience_entries:
172+
all_skills.update(exp.get("skills", []) or [])
173+
all_techs.update(exp.get("technologies", []) or [])
174+
175+
# Group by category (simple heuristic)
176+
# Join lists into display strings for HTML generator
177+
resume["technical_proficiencies"] = {
178+
"skills": ", ".join(sorted(list(all_skills))) if all_skills else "",
179+
"technologies": ", ".join(sorted(list(all_techs))) if all_techs else "",
180+
}
181+
182+
# Extract areas of expertise (from principles or techniques)
183+
print("💡 Extracting areas of expertise...")
184+
all_principles = set()
185+
for exp in experience_entries:
186+
all_principles.update(exp.get("principles", []) or [])
187+
all_principles.update(exp.get("techniques", []) or [])
188+
189+
resume["areas_of_expertise"] = sorted(list(all_principles)) if all_principles else [
190+
"Enterprise Architecture & Cloud Transformation",
191+
"Revenue Growth & Cost Optimization",
192+
"SaaS Solution Development",
193+
"Security & Data Protection",
194+
"AI Adoption & Scaling",
195+
"Data-Driven Analytics",
196+
"Process Automation",
197+
"Relationship Building",
198+
"Innovation & Product Development",
199+
"Business Strategy & Value Delivery",
200+
"Team Leadership & Agile Coaching",
201+
"Cybersecurity & Compliance",
202+
]
203+
204+
print(f"✅ Resume built successfully!")
205+
print(f" - {len(resume['experience'])} experience entries")
206+
print(f" - {len(resume['education'])} education entries")
207+
print(f" - {len(resume['certifications'])} certification entries")
208+
print(f" - {len(all_skills)} unique skills")
209+
print(f" - {len(all_techs)} unique technologies")
210+
211+
return resume
212+
213+
214+
def save_resume_json(resume: Dict[str, Any], output_path: str) -> None:
215+
"""Save resume to JSON file."""
216+
output_file = Path(output_path)
217+
output_file.parent.mkdir(parents=True, exist_ok=True)
218+
219+
with open(output_file, 'w', encoding='utf-8') as f:
220+
json.dump(resume, f, indent=2, ensure_ascii=False)
221+
222+
print(f"✅ Resume saved to {output_path}")
223+
224+
225+
if __name__ == "__main__":
226+
import sys
227+
228+
# Build resume from experience log
229+
resume = build_resume_from_experience_log()
230+
231+
# Save to file
232+
output_path = "data/resume_from_experience_log.json"
233+
save_resume_json(resume, output_path)
234+
235+
print(f"\n✅ Resume built from experience log!")
236+
print(f" Output: {output_path}")
237+

0 commit comments

Comments
 (0)