move shared config + fn to utils

mythz · mythz · commit 32a0eec723e3 · 2026-02-19T10:20:43.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@
 ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
 
 # Custom
+.DS_Store
 chat*.json
 QueryTechnology.json
 hn_top.json
diff --git a/scripts/analyze_hn_comments.py b/scripts/analyze_hn_comments.py
@@ -25,12 +25,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
 import requests
-from utils import USER_AGENT
-
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR))
-LLMS_SH = os.path.join(REPO_ROOT, "llms.sh")
-LLMS_MODEL = os.getenv("LLMS_MODEL", "moonshotai/kimi-k2.5")
+from utils import SCRIPT_DIR, REPO_ROOT, LLMS_SH, LLMS_ANALYTICS_MODEL, USER_AGENT, parse_json_response
 
 HN_API = "https://hacker-news.firebaseio.com/v0/item/{}.json"
 SESSION = requests.Session()
@@ -200,25 +195,6 @@ def comments_to_text(comments: list[dict], max_comments: int = 200, max_chars: i
 - Keep the total output under 500 words
 - Return ONLY valid JSON"""
 
-
-def parse_json_response(text: str) -> dict:
-    """Parse JSON from an LLM response."""
-    try:
-        return json.loads(text)
-    except json.JSONDecodeError:
-        pass
-    cleaned = re.sub(r"^```(?:json)?\s*", "", text.strip())
-    cleaned = re.sub(r"\s*```$", "", cleaned)
-    try:
-        return json.loads(cleaned)
-    except json.JSONDecodeError:
-        pass
-    match = re.search(r"(\{[\s\S]*\})", text)
-    if match:
-        return json.loads(match.group(1))
-    raise ValueError("Could not parse JSON from LLM response")
-
-
 def analyze_sentiment(post_title: str, comments_text: str, model: str) -> str:
     """Use LLM to generate sentiment analysis markdown."""
     user_message = f"Post Title: {post_title}\n\n--- COMMENTS ---\n{comments_text}"
@@ -260,7 +236,7 @@ def analyze_sentiment(post_title: str, comments_text: str, model: str) -> str:
 def main():
     parser = argparse.ArgumentParser(description="Analyze comments from a Hacker News post.")
     parser.add_argument("url", help="HN comments URL or item ID (e.g. https://news.ycombinator.com/item?id=46978710)")
-    parser.add_argument("--model", default=LLMS_MODEL, help=f"Model name (default: {LLMS_MODEL})")
+    parser.add_argument("--model", default=LLMS_ANALYTICS_MODEL, help=f"Model name (default: {LLMS_ANALYTICS_MODEL})")
     parser.add_argument(
         "--max-chars", type=int, default=30000, help="Max chars of comments to send to LLM (default: 30000)"
     )
diff --git a/scripts/analyze_reddit_comments.py b/scripts/analyze_reddit_comments.py
@@ -20,12 +20,7 @@
 import sys
 from urllib.request import urlopen, Request
 
-from utils import USER_AGENT
-
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR))
-LLMS_SH = os.path.join(REPO_ROOT, "llms.sh")
-LLMS_MODEL = os.getenv("LLMS_MODEL", "moonshotai/kimi-k2.5")
+from utils import SCRIPT_DIR, REPO_ROOT, LLMS_SH, LLMS_ANALYTICS_MODEL, USER_AGENT, parse_json_response
 
 def fetch_reddit_comments(comments_url: str) -> tuple[dict, list[dict]]:
     """Fetch post data and comment trees from a Reddit comments URL.
@@ -146,24 +141,6 @@ def comments_to_text(comments: list[dict], max_comments: int = 200, max_chars: i
 - Return ONLY valid JSON"""
 
 
-def parse_json_response(text: str) -> dict:
-    """Parse JSON from an LLM response."""
-    try:
-        return json.loads(text)
-    except json.JSONDecodeError:
-        pass
-    cleaned = re.sub(r"^```(?:json)?\s*", "", text.strip())
-    cleaned = re.sub(r"\s*```$", "", cleaned)
-    try:
-        return json.loads(cleaned)
-    except json.JSONDecodeError:
-        pass
-    match = re.search(r"(\{[\s\S]*\})", text)
-    if match:
-        return json.loads(match.group(1))
-    raise ValueError("Could not parse JSON from LLM response")
-
-
 def analyze_sentiment(post_title: str, comments_text: str, model: str) -> str:
     """Use LLM to generate sentiment analysis markdown."""
     user_message = f"Post Title: {post_title}\n\n--- COMMENTS ---\n{comments_text}"
@@ -205,7 +182,7 @@ def analyze_sentiment(post_title: str, comments_text: str, model: str) -> str:
 def main():
     parser = argparse.ArgumentParser(description="Analyze comments from a Reddit post.")
     parser.add_argument("post_id", help="Reddit post ID (e.g. 1r1zlqx)")
-    parser.add_argument("--model", default=LLMS_MODEL, help=f"Model name (default: {LLMS_MODEL})")
+    parser.add_argument("--model", default=LLMS_ANALYTICS_MODEL, help=f"Model name (default: {LLMS_ANALYTICS_MODEL})")
     parser.add_argument(
         "--max-chars", type=int, default=30000, help="Max chars of comments to send to LLM (default: 30000)"
     )
diff --git a/scripts/analyze_tech_article.py b/scripts/analyze_tech_article.py
@@ -24,14 +24,7 @@
 import requests
 from bs4 import BeautifulSoup, Comment
 from markdownify import markdownify as md
-from utils import USER_AGENT, parse_json_response
-
-PYTHON = sys.executable
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR))  # llms repo root
-LLMS_SH = os.path.join(REPO_ROOT, "llms.sh")
-# LLMS_MODEL = os.getenv("LLMS_MODEL","MiniMax-M2.1")
-LLMS_MODEL = os.getenv("LLMS_MODEL", "moonshotai/kimi-k2.5")  # moonshotai/kimi-k2.5
+from utils import SCRIPT_DIR, REPO_ROOT, LLMS_SH, LLMS_ANALYTICS_MODEL, USER_AGENT, parse_json_response
 
 # ── Content Extraction ───────────────────────────────────────────────────────
 
@@ -297,8 +290,8 @@ def main():
     parser.add_argument("url", help="URL of the technology article to analyze")
     parser.add_argument(
         "--model",
-        default=LLMS_MODEL,
-        help=f"Model name (default: $LLMS_MODEL or ${LLMS_MODEL})",
+        default=LLMS_ANALYTICS_MODEL,
+        help=f"Model name (default: $LLMS_MODEL or ${LLMS_ANALYTICS_MODEL})",
     )
     parser.add_argument(
         "--max-chars",
diff --git a/scripts/create_post.py b/scripts/create_post.py
@@ -16,12 +16,7 @@
 import aiohttp
 from yarl import URL
 
-from utils import COOKIES
-
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR))  # llms repo root
-LLMS_SH = os.path.join(REPO_ROOT, "llms.sh")
-LLMS_MODEL = os.getenv("LLMS_MODEL", "MiniMax-M2.1")
+from utils import SCRIPT_DIR, REPO_ROOT, LLMS_SH, LLMS_MODEL, COOKIES, parse_json_response
 
 TECHSTACKS_BASE = "https://techstacks.io"
 SEARCH_TECH_URL = f"{TECHSTACKS_BASE}/api/QueryTechnology"
@@ -132,31 +127,6 @@ def extract_json(text: str) -> str:
         text = re.sub(r"\n```\s*$", "", text)
     return text.strip()
 
-
-def parse_json_response(text):
-    # Try direct parse first
-    try:
-        return json.loads(text)
-    except json.JSONDecodeError:
-        pass
-
-    # Strip markdown fences
-    cleaned = re.sub(r"^```(?:json)?\s*", "", text.strip())
-    cleaned = re.sub(r"\s*```$", "", cleaned)
-
-    try:
-        return json.loads(cleaned)
-    except json.JSONDecodeError:
-        pass
-
-    # Try to extract JSON object/array
-    match = re.search(r"(\{[\s\S]*\}|\[[\s\S]*\])", text)
-    if match:
-        return json.loads(match.group(1))
-
-    raise ValueError("Could not parse JSON from response")
-
-
 def generate_posts_json(stories: list[dict], model: str) -> list[dict]:
     """Use llms.sh to filter HN stories and generate CreatePost entries."""
     stories_text = "\n".join(f"- Title: {s['title']}\n  URL: {s['url']}\n  Score: {s.get('score', 0)}" for s in stories)
diff --git a/scripts/create_technology.py b/scripts/create_technology.py
@@ -11,17 +11,10 @@
 import sys
 
 import aiohttp
-from utils import parse_json_response
 from yarl import URL
 
-from utils import create_slug, COOKIES
+from utils import TECHSTACKS_BASE, SCRIPT_DIR, REPO_ROOT, LLMS_SH, LLMS_TECH_MODEL, COOKIES, parse_json_response, create_slug
 
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR))  # llms repo root
-LLMS_SH = os.path.join(REPO_ROOT, "llms.sh")
-LLMS_MODEL = os.getenv("LLMS_MODEL", "glm-4.7")
-
-TECHSTACKS_BASE = "https://techstacks.io"
 CREATE_URL = f"{TECHSTACKS_BASE}/api/CreateTechnology"
 
 TECHNOLOGY_TIERS = [
@@ -133,7 +126,7 @@ async def create_technology(session: aiohttp.ClientSession, tech: dict) -> dict:
 async def main():
     parser = argparse.ArgumentParser(description="Create missing technologies on techstacks.io")
     parser.add_argument("names", nargs="+", help="Names of the technologies to search for / create")
-    parser.add_argument("--model", default=LLMS_MODEL, help=f"OpenAI model to use (default: {LLMS_MODEL})")
+    parser.add_argument("--model", default=LLMS_TECH_MODEL, help=f"OpenAI model to use (default: {LLMS_TECH_MODEL})")
     parser.add_argument("--dry-run", action="store_true", help="Generate JSON but don't create the technology")
     args = parser.parse_args()
 
diff --git a/scripts/delete_posts.py b/scripts/delete_posts.py
@@ -5,11 +5,11 @@
 ================
 Usage: delete_posts.py <search>
 """
+import sys
+import requests
+from utils import TECHSTACKS_BASE, create_cookie_jar
 
 async def main():
-    import sys
-    import requests
-    from utils import TECHSTACKS_BASE, create_cookie_jar
 
     if len(sys.argv) < 2:
         print("Usage: delete_posts.py <search>")
diff --git a/scripts/process_posts.py b/scripts/process_posts.py
@@ -16,11 +16,7 @@
 import sys
 from pathlib import Path
 
-from utils import MIN_HN_POINTS, MIN_REDDIT_POINTS
-
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-PYTHON = sys.executable
-
+from utils import MIN_HN_POINTS, MIN_REDDIT_POINTS, SCRIPT_DIR, PYTHON
 
 def load_done_urls() -> set:
     urls = set()
diff --git a/scripts/publish_posts.py b/scripts/publish_posts.py
@@ -16,9 +16,8 @@
 from pathlib import Path
 
 import requests
-from utils import TECHSTACKS_BASE, create_cookie_jar
+from utils import TECHSTACKS_BASE, SCRIPT_DIR, create_cookie_jar
 
-SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 POSTS_DIR = os.path.join(SCRIPT_DIR, "posts")
 COMPLETED_DIR = os.path.join(SCRIPT_DIR, "completed")
 FAILED_DIR = os.path.join(SCRIPT_DIR, "failed")
diff --git a/scripts/reddit_top.py b/scripts/reddit_top.py
@@ -10,7 +10,7 @@
 from pathlib import Path
 from urllib.request import Request, urlopen
 
-from utils import MIN_REDDIT_POINTS, TOP_REDDIT_LIMIT, USER_AGENT, create_slug, parse_json_response
+from utils import MIN_REDDIT_POINTS, TOP_REDDIT_LIMIT, USER_AGENT, create_slug
 
 SUBREDDITS = [
     "r/react",
diff --git a/scripts/utils.py b/scripts/utils.py
@@ -2,6 +2,7 @@
 import os
 import re
 import sys
+import shutil
 from urllib.parse import urlparse
 
 import requests
@@ -18,6 +19,16 @@
 MIN_REDDIT_POINTS = 200
 TOP_REDDIT_LIMIT = 100
 
+PYTHON = sys.executable
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+REPO_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR))  # llms repo root
+LLMS_SH = shutil.which("llms")
+LLMS_MODEL = os.getenv("LLMS_MODEL", "MiniMax-M2.1")
+LLMS_TECH_MODEL = os.getenv("LLMS_TECH_MODEL", "glm-4.7")
+LLMS_ANALYTICS_MODEL = os.getenv("LLMS_ANALYTICS_MODEL", "moonshotai/kimi-k2.5")  # moonshotai/kimi-k2.5
+
+if not LLMS_SH:
+    raise RuntimeError("llms command not found in PATH. Please ensure llms is installed and available.")
 
 def create_cookie_jar():
     parsed = urlparse(TECHSTACKS_BASE)