Skip to content

Commit f4357ed

Browse files
committed
Support parse jobs posts
1 parent 35050b3 commit f4357ed

3 files changed

Lines changed: 34 additions & 32 deletions

File tree

hacker_news/llm/openai.py

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -79,45 +79,38 @@ def call_openai_family(content: str, sys_prompt: str) -> str:
7979
# Gemma outputs weird words like Kün/viciss/▁purcha/▁xPos/▁Gorb
8080
kwargs['logit_bias'] = {200507: -100, 225856: -100, 6204: -100, 232014: -100, 172406: -100}
8181

82-
if config.openai_model.startswith('text-'):
83-
prompt = (f'Use third person mood to summarize the following article delimited by triple backticks in 2 concise English sentences. Ensure the summary does not exceed 100 characters.\n'
84-
f'```{content.strip(".")}.```')
85-
resp = openai.Completion.create(
86-
prompt=prompt,
87-
**kwargs
88-
)
89-
answer = resp['choices'][0]['text'].strip()
90-
else:
91-
resp = openai.ChatCompletion.create(
92-
messages=[
93-
{
94-
"role": "system",
95-
"content": sys_prompt
96-
},
97-
{'role': 'user', 'content': content},
98-
],
99-
**kwargs)
100-
message = resp["choices"][0]["message"]
101-
if message.get('function_call'):
102-
json_str = message['function_call']['arguments']
103-
if resp["choices"][0]['finish_reason'] == 'length':
104-
json_str += '"}' # best effort to save truncated answers
105-
try:
106-
answer = json.loads(json_str)
107-
except JSONDecodeError as e:
108-
logger.warning(f'Failed to decode answer from openai, will fallback to plain text, error: {e}')
109-
return '' # Let fallback code kicks in
110-
else:
111-
answer = message['content'].strip()
82+
resp = openai.ChatCompletion.create(
83+
messages=[
84+
{
85+
"role": "system",
86+
"content": sys_prompt
87+
},
88+
{'role': 'user', 'content': content},
89+
],
90+
**kwargs)
11291
logger.info(f'content: {content}')
11392
logger.info(f'took {time.time() - start_time}s to generate: '
11493
# Default str(resp) prints \u516c
11594
f'{json.dumps(resp.to_dict_recursive(), sort_keys=True, indent=2, ensure_ascii=False)}')
95+
if 'error' in resp:
96+
raise Exception(f'error message: {resp["error"].get("message")}, code: {resp["error"].get("code")}')
97+
message = resp["choices"][0]["message"]
98+
if message.get('function_call'):
99+
json_str = message['function_call']['arguments']
100+
if resp["choices"][0]['finish_reason'] == 'length':
101+
json_str += '"}' # best effort to save truncated answers
102+
try:
103+
answer = json.loads(json_str)
104+
except JSONDecodeError as e:
105+
logger.warning(f'Failed to decode answer from openai, will fallback to plain text, error: {e}')
106+
return '' # Let fallback code kicks in
107+
else:
108+
answer = message['content'].strip()
116109
# Gemma sometimes returns "**Summary:**\n\nXXX\n\n**Key points:**\n\nXXX", extract the summary part
117110
for line in answer.split('\n'):
118111
if not line.strip():
119112
continue
120-
if 'summary' in line.lower() and len(line) <= 100:
113+
if 'summary' in line.lower() and line.strip()[-1] == ':':
121114
continue
122115
answer = line
123116
break

hacker_news/news.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ def get_score(self) -> int:
8080
except:
8181
return 0
8282

83+
def is_hiring_job(self) -> bool:
84+
return self.get_score() == 0 and not self.author and 'YC ' in self.title
85+
8386
def slug(self):
8487
return slugify(self.title or 'no title')
8588

@@ -130,7 +133,8 @@ def summarize_by_openai(self, content):
130133
if not openai.api_key:
131134
logger.info("OpenAI API key is not set")
132135
return ''
133-
if self.get_score() < config.openai_score_threshold: # Avoid expensive openai
136+
if (self.get_score() < config.openai_score_threshold # Avoid expensive openai
137+
and not self.is_hiring_job()):
134138
logger.info("Score %d is too small, ignore openai", self.get_score())
135139
return ''
136140

test/test_hackernews_parser.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datetime import datetime, timedelta
33

44
from hacker_news.algolia_api import get_news
5+
from hacker_news.news import News
56
from hacker_news.parser import HackerNewsParser
67

78

@@ -52,3 +53,7 @@ def test_algolia_api(self):
5253
date = news_list[0].submit_time.date()
5354
for news in news_list:
5455
self.assertEqual(date, news.submit_time.date())
56+
57+
def test_maybe_jobs_post(self):
58+
news = News(title='MixRank (YC S11) Is Hiring Software Engineers and Founders Globally')
59+
self.assertTrue(news.is_hiring_job())

0 commit comments

Comments
 (0)