Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b897676
refactor(fetch_github_stars): remove redundant early-return guard in …
vinta Apr 19, 2026
e47d229
refactor(readme_parser): consolidate state reset to tail of flush_group
vinta Apr 19, 2026
a358d45
refactor: use datetime.UTC alias instead of timezone.utc
vinta Apr 19, 2026
b9236c4
refactor(fetch_github_stars): drop unnecessary keyword-only marker on…
vinta Apr 19, 2026
c85f81b
refactor(build): accept Path directly in build() signature
vinta Apr 19, 2026
7f4a163
refactor(build): tighten extract_entries parameter types to ParsedSec…
vinta Apr 19, 2026
7e7de19
refactor(build): remove StarData TypedDict, loosen load_stars return …
vinta Apr 19, 2026
e0b0dc9
refactor(readme_parser): add _href helper to narrow attrGet return type
vinta Apr 19, 2026
486fbf2
refactor(readme_parser): replace _find_first_link with _find_child(in…
vinta Apr 19, 2026
85b55ef
refactor(readme_parser): inline _is_leading_link at its call site
vinta Apr 19, 2026
39b65bc
refactor(build): inline format_stars_short into its call site
vinta Apr 19, 2026
95115f7
refactor(fetch_github_stars): replace manual slice loop with itertool…
vinta Apr 19, 2026
6ae7c89
refactor: replace manual total_seconds()/3600 with timedelta comparison
vinta Apr 19, 2026
0630ee9
refactor(build): flatten extract_entries and annotate result dict
vinta Apr 19, 2026
420bf8c
refactor(readme_parser): collapse render_inline_html/text into _rende…
vinta Apr 19, 2026
9293696
refactor(readme_parser): fuse _parse_sponsor_item into single pass
vinta Apr 19, 2026
f10337b
refactor(tests): modernize test_readme_parser to use pathlib.Path
vinta Apr 19, 2026
257b69a
style(sponsors): bump section-label to --text-lg within sponsor scope
vinta Apr 19, 2026
f3c8377
chore: remove arrow from 'Become a sponsor' link and its CSS rules
vinta Apr 19, 2026
19496c2
refactor(css): replace sponsor-become border underline with text-deco…
vinta Apr 19, 2026
7625d1f
style: use --accent-underline on sponsor link underline
vinta Apr 19, 2026
674c169
fix(css): scope --accent-underline to sponsor-become hover; restore -…
vinta Apr 19, 2026
35aee20
remove color
vinta Apr 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 38 additions & 56 deletions website/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,12 @@
import json
import re
import shutil
from datetime import datetime, timezone
from datetime import UTC, datetime
from pathlib import Path
from typing import TypedDict
from typing import Any

from jinja2 import Environment, FileSystemLoader
from readme_parser import parse_readme, parse_sponsors


class StarData(TypedDict):
stars: int
owner: str
last_commit_at: str
fetched_at: str

from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors

GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")

Expand Down Expand Up @@ -46,7 +38,7 @@ def extract_github_repo(url: str) -> str | None:
return m.group(1) if m else None


def load_stars(path: Path) -> dict[str, StarData]:
def load_stars(path: Path) -> dict[str, dict]:
"""Load star data from JSON. Returns empty dict if file doesn't exist or is corrupt."""
if path.exists():
try:
Expand Down Expand Up @@ -76,68 +68,55 @@ def sort_key(entry: dict) -> tuple[int, int, int, str]:


def extract_entries(
categories: list[dict],
groups: list[dict],
categories: list[ParsedSection],
groups: list[ParsedGroup],
) -> list[dict]:
"""Flatten categories into individual library entries for table display.

Entries appearing in multiple categories are merged into a single entry
with lists of categories and groups.
"""
cat_to_group: dict[str, str] = {}
for group in groups:
for cat in group["categories"]:
cat_to_group[cat["name"]] = group["name"]
cat_to_group = {cat["name"]: group["name"] for group in groups for cat in group["categories"]}

seen: dict[tuple[str, str], dict] = {} # (url, name) -> entry
entries: list[dict] = []
seen: dict[tuple[str, str], dict[str, Any]] = {} # (url, name) -> entry
entries: list[dict[str, Any]] = []
for cat in categories:
group_name = cat_to_group.get(cat["name"], "Other")
for entry in cat["entries"]:
url = entry["url"]
key = (url, entry["name"])
if key in seen:
existing = seen[key]
if cat["name"] not in existing["categories"]:
existing["categories"].append(cat["name"])
if group_name not in existing["groups"]:
existing["groups"].append(group_name)
subcat = entry["subcategory"]
if subcat:
scoped = f"{cat['name']} > {subcat}"
if not any(s["value"] == scoped for s in existing["subcategories"]):
existing["subcategories"].append({"name": subcat, "value": scoped})
else:
merged = {
key = (entry["url"], entry["name"])
existing: dict[str, Any] | None = seen.get(key)
if existing is None:
existing = {
"name": entry["name"],
"url": url,
"url": entry["url"],
"description": entry["description"],
"categories": [cat["name"]],
"groups": [group_name],
"subcategories": [{"name": entry["subcategory"], "value": f"{cat['name']} > {entry['subcategory']}"}] if entry["subcategory"] else [],
"categories": [],
"groups": [],
"subcategories": [],
"stars": None,
"owner": None,
"last_commit_at": None,
"source_type": detect_source_type(url),
"source_type": detect_source_type(entry["url"]),
"also_see": entry["also_see"],
}
seen[key] = merged
entries.append(merged)
seen[key] = existing
entries.append(existing)
if cat["name"] not in existing["categories"]:
existing["categories"].append(cat["name"])
if group_name not in existing["groups"]:
existing["groups"].append(group_name)
subcat = entry["subcategory"]
if subcat:
scoped = f"{cat['name']} > {subcat}"
if not any(s["value"] == scoped for s in existing["subcategories"]):
existing["subcategories"].append({"name": subcat, "value": scoped})
return entries


def format_stars_short(stars: int) -> str:
"""Format star count as compact string like '230k'."""
if stars >= 1000:
return f"{stars // 1000}k"
return str(stars)


def build(repo_root: str) -> None:
def build(repo_root: Path) -> None:
"""Main build: parse README, render single-page HTML via Jinja2 templates."""
repo = Path(repo_root)
website = repo / "website"
readme_text = (repo / "README.md").read_text(encoding="utf-8")
website = repo_root / "website"
readme_text = (repo_root / "README.md").read_text(encoding="utf-8")

subtitle = ""
for line in readme_text.split("\n"):
Expand All @@ -156,7 +135,10 @@ def build(repo_root: str) -> None:
stars_data = load_stars(website / "data" / "github_stars.json")

repo_self = stars_data.get("vinta/awesome-python", {})
repo_stars = format_stars_short(repo_self["stars"]) if "stars" in repo_self else None
repo_stars = None
if "stars" in repo_self:
stars_val = repo_self["stars"]
repo_stars = f"{stars_val // 1000}k" if stars_val >= 1000 else str(stars_val)

for entry in entries:
repo_key = extract_github_repo(entry["url"])
Expand Down Expand Up @@ -189,7 +171,7 @@ def build(repo_root: str) -> None:
total_entries=total_entries,
total_categories=len(categories),
repo_stars=repo_stars,
build_date=datetime.now(timezone.utc).strftime("%B %d, %Y"),
build_date=datetime.now(UTC).strftime("%B %d, %Y"),
sponsors=sponsors,
),
encoding="utf-8",
Expand All @@ -208,4 +190,4 @@ def build(repo_root: str) -> None:


if __name__ == "__main__":
build(str(Path(__file__).parent.parent))
build(Path(__file__).parent.parent)
26 changes: 11 additions & 15 deletions website/fetch_github_stars.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import os
import re
import sys
from datetime import datetime, timezone
from collections.abc import Sequence
from datetime import UTC, datetime, timedelta
from itertools import batched
from pathlib import Path

import httpx
Expand Down Expand Up @@ -44,10 +46,8 @@ def save_cache(cache: dict) -> None:
)


def build_graphql_query(repos: list[str]) -> str:
def build_graphql_query(repos: Sequence[str]) -> str:
"""Build a GraphQL query with aliases for up to 100 repos."""
if not repos:
return ""
parts = []
for i, repo in enumerate(repos):
owner, name = repo.split("/", 1)
Expand All @@ -64,7 +64,7 @@ def build_graphql_query(repos: list[str]) -> str:

def parse_graphql_response(
data: dict,
repos: list[str],
repos: Sequence[str],
) -> dict[str, dict]:
"""Parse GraphQL response into {owner/repo: {stars, owner}} dict."""
result = {}
Expand All @@ -82,9 +82,7 @@ def parse_graphql_response(
return result


def fetch_batch(
repos: list[str], *, client: httpx.Client,
) -> dict[str, dict]:
def fetch_batch(repos: Sequence[str], client: httpx.Client) -> dict[str, dict]:
"""Fetch star data for a batch of repos via GitHub GraphQL API."""
query = build_graphql_query(repos)
if not query:
Expand Down Expand Up @@ -112,7 +110,7 @@ def main() -> None:
print(f"Found {len(current_repos)} GitHub repos in README.md")

cache = load_stars(CACHE_FILE)
now = datetime.now(timezone.utc)
now = datetime.now(UTC)

# Prune entries not in current README
pruned = {k: v for k, v in cache.items() if k in current_repos}
Expand All @@ -121,13 +119,13 @@ def main() -> None:
cache = pruned

# Determine which repos need fetching (missing or stale)
max_age = timedelta(hours=CACHE_MAX_AGE_HOURS)
to_fetch = []
for repo in sorted(current_repos):
entry = cache.get(repo)
if entry and "fetched_at" in entry:
fetched = datetime.fromisoformat(entry["fetched_at"])
age_hours = (now - fetched).total_seconds() / 3600
if age_hours < CACHE_MAX_AGE_HOURS:
if now - fetched < max_age:
continue
to_fetch.append(repo)

Expand All @@ -150,13 +148,11 @@ def main() -> None:
transport=httpx.HTTPTransport(retries=2),
timeout=30,
) as client:
for i in range(0, len(to_fetch), BATCH_SIZE):
batch = to_fetch[i : i + BATCH_SIZE]
batch_num = i // BATCH_SIZE + 1
for batch_num, batch in enumerate(batched(to_fetch, BATCH_SIZE), 1):
print(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} repos)...")

try:
results = fetch_batch(batch, client=client)
results = fetch_batch(batch, client)
except httpx.HTTPStatusError as e:
print(f"HTTP error {e.response.status_code}", file=sys.stderr)
if e.response.status_code == 401:
Expand Down
Loading
Loading