Skip to content

Commit 32a0eec

Browse files
committed
move shared config + fn to utils
1 parent ae431aa commit 32a0eec

File tree

11 files changed

+28
-112
lines changed

11 files changed

+28
-112
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
55

66
# Custom
7+
.DS_Store
78
chat*.json
89
QueryTechnology.json
910
hn_top.json

scripts/analyze_hn_comments.py

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,7 @@
2525
from concurrent.futures import ThreadPoolExecutor, as_completed
2626

2727
import requests
28-
from utils import USER_AGENT
29-
30-
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
31-
REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR))
32-
LLMS_SH = os.path.join(REPO_ROOT, "llms.sh")
33-
LLMS_MODEL = os.getenv("LLMS_MODEL", "moonshotai/kimi-k2.5")
28+
from utils import SCRIPT_DIR, REPO_ROOT, LLMS_SH, LLMS_ANALYTICS_MODEL, USER_AGENT, parse_json_response
3429

3530
HN_API = "https://hacker-news.firebaseio.com/v0/item/{}.json"
3631
SESSION = requests.Session()
@@ -200,25 +195,6 @@ def comments_to_text(comments: list[dict], max_comments: int = 200, max_chars: i
200195
- Keep the total output under 500 words
201196
- Return ONLY valid JSON"""
202197

203-
204-
def parse_json_response(text: str) -> dict:
205-
"""Parse JSON from an LLM response."""
206-
try:
207-
return json.loads(text)
208-
except json.JSONDecodeError:
209-
pass
210-
cleaned = re.sub(r"^```(?:json)?\s*", "", text.strip())
211-
cleaned = re.sub(r"\s*```$", "", cleaned)
212-
try:
213-
return json.loads(cleaned)
214-
except json.JSONDecodeError:
215-
pass
216-
match = re.search(r"(\{[\s\S]*\})", text)
217-
if match:
218-
return json.loads(match.group(1))
219-
raise ValueError("Could not parse JSON from LLM response")
220-
221-
222198
def analyze_sentiment(post_title: str, comments_text: str, model: str) -> str:
223199
"""Use LLM to generate sentiment analysis markdown."""
224200
user_message = f"Post Title: {post_title}\n\n--- COMMENTS ---\n{comments_text}"
@@ -260,7 +236,7 @@ def analyze_sentiment(post_title: str, comments_text: str, model: str) -> str:
260236
def main():
261237
parser = argparse.ArgumentParser(description="Analyze comments from a Hacker News post.")
262238
parser.add_argument("url", help="HN comments URL or item ID (e.g. https://news.ycombinator.com/item?id=46978710)")
263-
parser.add_argument("--model", default=LLMS_MODEL, help=f"Model name (default: {LLMS_MODEL})")
239+
parser.add_argument("--model", default=LLMS_ANALYTICS_MODEL, help=f"Model name (default: {LLMS_ANALYTICS_MODEL})")
264240
parser.add_argument(
265241
"--max-chars", type=int, default=30000, help="Max chars of comments to send to LLM (default: 30000)"
266242
)

scripts/analyze_reddit_comments.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,7 @@
2020
import sys
2121
from urllib.request import urlopen, Request
2222

23-
from utils import USER_AGENT
24-
25-
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
26-
REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR))
27-
LLMS_SH = os.path.join(REPO_ROOT, "llms.sh")
28-
LLMS_MODEL = os.getenv("LLMS_MODEL", "moonshotai/kimi-k2.5")
23+
from utils import SCRIPT_DIR, REPO_ROOT, LLMS_SH, LLMS_ANALYTICS_MODEL, USER_AGENT, parse_json_response
2924

3025
def fetch_reddit_comments(comments_url: str) -> tuple[dict, list[dict]]:
3126
"""Fetch post data and comment trees from a Reddit comments URL.
@@ -146,24 +141,6 @@ def comments_to_text(comments: list[dict], max_comments: int = 200, max_chars: i
146141
- Return ONLY valid JSON"""
147142

148143

149-
def parse_json_response(text: str) -> dict:
150-
"""Parse JSON from an LLM response."""
151-
try:
152-
return json.loads(text)
153-
except json.JSONDecodeError:
154-
pass
155-
cleaned = re.sub(r"^```(?:json)?\s*", "", text.strip())
156-
cleaned = re.sub(r"\s*```$", "", cleaned)
157-
try:
158-
return json.loads(cleaned)
159-
except json.JSONDecodeError:
160-
pass
161-
match = re.search(r"(\{[\s\S]*\})", text)
162-
if match:
163-
return json.loads(match.group(1))
164-
raise ValueError("Could not parse JSON from LLM response")
165-
166-
167144
def analyze_sentiment(post_title: str, comments_text: str, model: str) -> str:
168145
"""Use LLM to generate sentiment analysis markdown."""
169146
user_message = f"Post Title: {post_title}\n\n--- COMMENTS ---\n{comments_text}"
@@ -205,7 +182,7 @@ def analyze_sentiment(post_title: str, comments_text: str, model: str) -> str:
205182
def main():
206183
parser = argparse.ArgumentParser(description="Analyze comments from a Reddit post.")
207184
parser.add_argument("post_id", help="Reddit post ID (e.g. 1r1zlqx)")
208-
parser.add_argument("--model", default=LLMS_MODEL, help=f"Model name (default: {LLMS_MODEL})")
185+
parser.add_argument("--model", default=LLMS_ANALYTICS_MODEL, help=f"Model name (default: {LLMS_ANALYTICS_MODEL})")
209186
parser.add_argument(
210187
"--max-chars", type=int, default=30000, help="Max chars of comments to send to LLM (default: 30000)"
211188
)

scripts/analyze_tech_article.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,7 @@
2424
import requests
2525
from bs4 import BeautifulSoup, Comment
2626
from markdownify import markdownify as md
27-
from utils import USER_AGENT, parse_json_response
28-
29-
PYTHON = sys.executable
30-
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
31-
REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR)) # llms repo root
32-
LLMS_SH = os.path.join(REPO_ROOT, "llms.sh")
33-
# LLMS_MODEL = os.getenv("LLMS_MODEL","MiniMax-M2.1")
34-
LLMS_MODEL = os.getenv("LLMS_MODEL", "moonshotai/kimi-k2.5") # moonshotai/kimi-k2.5
27+
from utils import SCRIPT_DIR, REPO_ROOT, LLMS_SH, LLMS_ANALYTICS_MODEL, USER_AGENT, parse_json_response
3528

3629
# ── Content Extraction ───────────────────────────────────────────────────────
3730

@@ -297,8 +290,8 @@ def main():
297290
parser.add_argument("url", help="URL of the technology article to analyze")
298291
parser.add_argument(
299292
"--model",
300-
default=LLMS_MODEL,
301-
help=f"Model name (default: $LLMS_MODEL or ${LLMS_MODEL})",
293+
default=LLMS_ANALYTICS_MODEL,
294+
help=f"Model name (default: $LLMS_MODEL or ${LLMS_ANALYTICS_MODEL})",
302295
)
303296
parser.add_argument(
304297
"--max-chars",

scripts/create_post.py

Lines changed: 1 addition & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,7 @@
1616
import aiohttp
1717
from yarl import URL
1818

19-
from utils import COOKIES
20-
21-
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
22-
REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR)) # llms repo root
23-
LLMS_SH = os.path.join(REPO_ROOT, "llms.sh")
24-
LLMS_MODEL = os.getenv("LLMS_MODEL", "MiniMax-M2.1")
19+
from utils import SCRIPT_DIR, REPO_ROOT, LLMS_SH, LLMS_MODEL, COOKIES, parse_json_response
2520

2621
TECHSTACKS_BASE = "https://techstacks.io"
2722
SEARCH_TECH_URL = f"{TECHSTACKS_BASE}/api/QueryTechnology"
@@ -132,31 +127,6 @@ def extract_json(text: str) -> str:
132127
text = re.sub(r"\n```\s*$", "", text)
133128
return text.strip()
134129

135-
136-
def parse_json_response(text):
137-
# Try direct parse first
138-
try:
139-
return json.loads(text)
140-
except json.JSONDecodeError:
141-
pass
142-
143-
# Strip markdown fences
144-
cleaned = re.sub(r"^```(?:json)?\s*", "", text.strip())
145-
cleaned = re.sub(r"\s*```$", "", cleaned)
146-
147-
try:
148-
return json.loads(cleaned)
149-
except json.JSONDecodeError:
150-
pass
151-
152-
# Try to extract JSON object/array
153-
match = re.search(r"(\{[\s\S]*\}|\[[\s\S]*\])", text)
154-
if match:
155-
return json.loads(match.group(1))
156-
157-
raise ValueError("Could not parse JSON from response")
158-
159-
160130
def generate_posts_json(stories: list[dict], model: str) -> list[dict]:
161131
"""Use llms.sh to filter HN stories and generate CreatePost entries."""
162132
stories_text = "\n".join(f"- Title: {s['title']}\n URL: {s['url']}\n Score: {s.get('score', 0)}" for s in stories)

scripts/create_technology.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,10 @@
1111
import sys
1212

1313
import aiohttp
14-
from utils import parse_json_response
1514
from yarl import URL
1615

17-
from utils import create_slug, COOKIES
16+
from utils import TECHSTACKS_BASE, SCRIPT_DIR, REPO_ROOT, LLMS_SH, LLMS_TECH_MODEL, COOKIES, parse_json_response, create_slug
1817

19-
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
20-
REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR)) # llms repo root
21-
LLMS_SH = os.path.join(REPO_ROOT, "llms.sh")
22-
LLMS_MODEL = os.getenv("LLMS_MODEL", "glm-4.7")
23-
24-
TECHSTACKS_BASE = "https://techstacks.io"
2518
CREATE_URL = f"{TECHSTACKS_BASE}/api/CreateTechnology"
2619

2720
TECHNOLOGY_TIERS = [
@@ -133,7 +126,7 @@ async def create_technology(session: aiohttp.ClientSession, tech: dict) -> dict:
133126
async def main():
134127
parser = argparse.ArgumentParser(description="Create missing technologies on techstacks.io")
135128
parser.add_argument("names", nargs="+", help="Names of the technologies to search for / create")
136-
parser.add_argument("--model", default=LLMS_MODEL, help=f"OpenAI model to use (default: {LLMS_MODEL})")
129+
parser.add_argument("--model", default=LLMS_TECH_MODEL, help=f"OpenAI model to use (default: {LLMS_TECH_MODEL})")
137130
parser.add_argument("--dry-run", action="store_true", help="Generate JSON but don't create the technology")
138131
args = parser.parse_args()
139132

scripts/delete_posts.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
================
66
Usage: delete_posts.py <search>
77
"""
8+
import sys
9+
import requests
10+
from utils import TECHSTACKS_BASE, create_cookie_jar
811

912
async def main():
10-
import sys
11-
import requests
12-
from utils import TECHSTACKS_BASE, create_cookie_jar
1313

1414
if len(sys.argv) < 2:
1515
print("Usage: delete_posts.py <search>")

scripts/process_posts.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,7 @@
1616
import sys
1717
from pathlib import Path
1818

19-
from utils import MIN_HN_POINTS, MIN_REDDIT_POINTS
20-
21-
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
22-
PYTHON = sys.executable
23-
19+
from utils import MIN_HN_POINTS, MIN_REDDIT_POINTS, SCRIPT_DIR, PYTHON
2420

2521
def load_done_urls() -> set:
2622
urls = set()

scripts/publish_posts.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,8 @@
1616
from pathlib import Path
1717

1818
import requests
19-
from utils import TECHSTACKS_BASE, create_cookie_jar
19+
from utils import TECHSTACKS_BASE, SCRIPT_DIR, create_cookie_jar
2020

21-
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
2221
POSTS_DIR = os.path.join(SCRIPT_DIR, "posts")
2322
COMPLETED_DIR = os.path.join(SCRIPT_DIR, "completed")
2423
FAILED_DIR = os.path.join(SCRIPT_DIR, "failed")

scripts/reddit_top.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pathlib import Path
1111
from urllib.request import Request, urlopen
1212

13-
from utils import MIN_REDDIT_POINTS, TOP_REDDIT_LIMIT, USER_AGENT, create_slug, parse_json_response
13+
from utils import MIN_REDDIT_POINTS, TOP_REDDIT_LIMIT, USER_AGENT, create_slug
1414

1515
SUBREDDITS = [
1616
"r/react",

0 commit comments

Comments
 (0)