-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathuser_context.py
More file actions
200 lines (173 loc) · 6.97 KB
/
user_context.py
File metadata and controls
200 lines (173 loc) · 6.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import json
import os
import pdfplumber
import jieba
import re
class UserManager:
def __init__(self, data_path='user_data.json'):
self.data_path = data_path
self.bookmarks = [] # List of job_ids
self.applications = {} # Dict of job_id: {status: 'applied', date: '...'}
self.profile = {
"name": "",
"phone": "",
"email": "",
"school": "",
"major": "",
"skills": [], # List of skill keywords
"resume_text": ""
}
self.load_data()
def load_data(self):
if os.path.exists(self.data_path):
try:
with open(self.data_path, 'r', encoding='utf-8') as f:
data = json.load(f)
self.bookmarks = data.get('bookmarks', [])
self.applications = data.get('applications', {})
self.profile = data.get('profile', self.profile)
# Auto-clean historical duplicate bookmarks on startup.
normalized = self._normalize_bookmarks(self.bookmarks)
if normalized != self.bookmarks:
self.bookmarks = normalized
self.save_data()
except Exception as e:
print(f"Error loading user data: {e}")
def _normalize_bookmarks(self, bookmarks):
"""Normalize bookmark IDs to strings and de-duplicate while keeping order."""
normalized = []
seen = set()
for raw in bookmarks or []:
if raw is None:
continue
key = str(raw).strip()
if not key or key in seen:
continue
seen.add(key)
normalized.append(key)
return normalized
def save_data(self):
try:
with open(self.data_path, 'w', encoding='utf-8') as f:
json.dump({
"bookmarks": self.bookmarks,
"applications": self.applications,
"profile": self.profile
}, f, ensure_ascii=False, indent=4)
except Exception as e:
print(f"Error saving user data: {e}")
def toggle_bookmark(self, job_id):
key = str(job_id).strip() if job_id is not None else ""
if not key:
return False
self.bookmarks = self._normalize_bookmarks(self.bookmarks)
if key in self.bookmarks:
# Remove all occurrences defensively.
self.bookmarks = [b for b in self.bookmarks if b != key]
saved = False
else:
self.bookmarks.append(key)
saved = True
self.save_data()
return saved
def remove_bookmark(self, job_id):
"""Remove bookmark explicitly and persist, returns True if removed."""
key = str(job_id).strip() if job_id is not None else ""
if not key:
return False
before = len(self.bookmarks)
self.bookmarks = [b for b in self._normalize_bookmarks(self.bookmarks) if b != key]
if len(self.bookmarks) != before:
self.save_data()
return True
return False
def is_bookmarked(self, job_id):
key = str(job_id).strip() if job_id is not None else ""
return key in self.bookmarks
def add_application(self, job_id, status="Applied"):
from datetime import datetime
self.applications[job_id] = {
"status": status,
"date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
self.save_data()
def is_applied(self, job_id):
return job_id in self.applications
def parse_resume(self, pdf_path):
"""
Extract text from PDF and update profile with skills and info.
"""
text = ""
try:
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
extracted = page.extract_text()
if extracted:
text += extracted + "\n"
self.profile['resume_text'] = text
self._extract_entities(text)
self.save_data()
return True, "Resume parsed successfully."
except Exception as e:
return False, str(e)
def _extract_entities(self, text):
"""
Simple heuristic extraction for demo purposes.
"""
# 1. Email
email_match = re.search(r'[\w\.-]+@[\w\.-]+', text)
if email_match:
self.profile['email'] = email_match.group(0)
# 2. Phone
phone_match = re.search(r'1[3-9]\d{9}', text)
if phone_match:
self.profile['phone'] = phone_match.group(0)
# 3. Skills (Keyword matching from a predefined list)
common_skills = [
'Python', 'Java', 'C++', 'SQL', 'HTML', 'CSS', 'JavaScript', 'React', 'Vue',
'Node.js', 'Django', 'Flask', 'Spring', 'Git', 'Linux', 'Docker', 'Kubernetes',
'Machine Learning', 'Deep Learning', 'PyTorch', 'TensorFlow', 'NLP', 'CV',
'Office', 'Excel', 'Word', 'PPT', 'Photoshop', 'PS', 'PR', 'AE'
]
found_skills = set()
text_lower = text.lower()
for skill in common_skills:
if skill.lower() in text_lower:
found_skills.add(skill)
self.profile['skills'] = list(found_skills)
# 4. School (Simple lookup)
# In a real app, use a university dictionary or NER model
if "大学" in text:
# Try to grab the snippet around "大学"
# Very naive extraction
sentences = text.split('\n')
for line in sentences:
if '大学' in line:
# Clean up line and guess if it's the school name
# Take the part that ends with 大学
match = re.search(r'[\u4e00-\u9fa5]+大学', line)
if match:
self.profile['school'] = match.group(0)
break
def get_recommendation_query(self):
"""
Construct a query string from profile for Context-Aware Recommendation.
"""
if not self.profile['skills'] and not self.profile['school']:
return ""
# Build terms from possibly dirty historical data and keep insertion order.
raw_terms = []
for skill in self.profile.get('skills', []):
if isinstance(skill, str) and skill.strip():
raw_terms.append(skill.strip())
school = self.profile.get('school', '')
if isinstance(school, str) and school.strip():
# Split by whitespace so persisted duplicated school tokens collapse.
raw_terms.extend([t for t in school.split() if t.strip()])
deduped_terms = []
seen = set()
for term in raw_terms:
if term not in seen:
seen.add(term)
deduped_terms.append(term)
return " ".join(deduped_terms)