Skip to content

Commit 429c9b3

Browse files
vintaclaude
andcommitted
feat: generate llms.txt from template and annotate entries with star counts
- Add llms.txt Jinja2 template with a categories_md placeholder - Extract categories body from README and inject it into the template - Annotate bullet-entry lines with GitHub star counts (N GitHub stars) for the main index.md and bare numbers for llms.txt - Add TestAnnotateEntriesWithStars unit tests Co-Authored-By: Claude <noreply@anthropic.com>
1 parent d9f26a8 commit 429c9b3

3 files changed

Lines changed: 169 additions & 3 deletions

File tree

website/build.py

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors
1515

1616
GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
17+
MARKDOWN_LINK_RE = re.compile(r"\[[^\]]+\]\(([^)\s]+)\)")
18+
BULLET_LINE_RE = re.compile(r"^\s*-\s")
1719
SITE_URL = "https://awesome-python.com/"
1820
SITEMAP_URL = f"{SITE_URL}sitemap.xml"
1921
SITEMAP_NS = "http://www.sitemaps.org/schemas/sitemap/0.9"
@@ -104,6 +106,72 @@ def top_level_heading_text(line: str) -> str | None:
104106
return stripped.removeprefix("#").strip().strip("#").strip().strip("*").strip()
105107

106108

109+
LLMS_CATEGORIES_PLACEHOLDER = "{{ categories_md }}"
110+
111+
112+
def extract_categories_body(markdown: str) -> str:
113+
"""Return content under the `# Categories` heading, excluding the heading line itself."""
114+
lines = markdown.splitlines(keepends=True)
115+
start_idx = None
116+
end_idx = len(lines)
117+
for i, line in enumerate(lines):
118+
heading = top_level_heading_text(line)
119+
if heading is None:
120+
continue
121+
if start_idx is None and heading.lower() == "categories":
122+
start_idx = i + 1
123+
while start_idx < len(lines) and lines[start_idx].strip() == "":
124+
start_idx += 1
125+
elif start_idx is not None and i >= start_idx:
126+
end_idx = i
127+
break
128+
if start_idx is None:
129+
return ""
130+
return "".join(lines[start_idx:end_idx]).rstrip() + "\n"
131+
132+
133+
def build_llms_txt(template_text: str, readme_text: str, stars_data: dict[str, dict]) -> str:
134+
"""Render the llms.txt template by injecting the README's Categories body, then annotate stars."""
135+
body = extract_categories_body(readme_text).rstrip()
136+
rendered = template_text.replace(LLMS_CATEGORIES_PLACEHOLDER, body)
137+
return annotate_entries_with_stars(rendered, stars_data, format_stars=str)
138+
139+
140+
def annotate_entries_with_stars(
141+
markdown: str,
142+
stars_data: dict[str, dict],
143+
*,
144+
format_stars=None,
145+
) -> str:
146+
"""Append the star count to bullet entry lines whose first GitHub link has known star data.
147+
148+
`format_stars` controls the parenthesized text. Defaults to "{N} GitHub stars".
149+
Pass `str` for a bare number.
150+
"""
151+
if format_stars is None:
152+
format_stars = lambda n: f"{n} GitHub stars" # noqa: E731 lambda-assignment
153+
lines = markdown.splitlines(keepends=True)
154+
out: list[str] = []
155+
for line in lines:
156+
if not BULLET_LINE_RE.match(line):
157+
out.append(line)
158+
continue
159+
annotated = line
160+
for match in MARKDOWN_LINK_RE.finditer(line):
161+
repo_key = extract_github_repo(match.group(1))
162+
if not repo_key:
163+
continue
164+
entry = stars_data.get(repo_key)
165+
if not entry or "stars" not in entry:
166+
continue
167+
stripped = line.rstrip("\n")
168+
ending = line[len(stripped):]
169+
annotated = f"{stripped} ({format_stars(entry['stars'])}){ending}"
170+
break
171+
out.append(annotated)
172+
return "".join(out)
173+
174+
107175
def remove_sponsors_section(markdown: str) -> str:
108176
lines = markdown.splitlines(keepends=True)
109177
start_idx = None
@@ -243,11 +311,15 @@ def build(repo_root: Path) -> None:
243311
if static_src.exists():
244312
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
245313

246-
markdown_index = remove_sponsors_section(readme_text)
314+
markdown_index = annotate_entries_with_stars(
315+
remove_sponsors_section(readme_text), stars_data
316+
)
317+
llms_template = (website / "templates" / "llms.txt").read_text(encoding="utf-8")
318+
llms_txt = build_llms_txt(llms_template, readme_text, stars_data)
247319
(site_dir / "robots.txt").write_text(build_robots_txt(), encoding="utf-8")
248320
write_sitemap_xml(site_dir / "sitemap.xml", [(SITE_URL, build_date.date().isoformat())])
249321
(site_dir / "index.md").write_text(markdown_index, encoding="utf-8")
250-
(site_dir / "llms.txt").write_text(markdown_index, encoding="utf-8")
322+
(site_dir / "llms.txt").write_text(llms_txt, encoding="utf-8")
251323

252324
print(f"Built single page with {len(parsed_groups)} groups, {len(categories)} categories")
253325
print(f"Total entries: {total_entries}")

website/templates/llms.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Awesome Python
2+
3+
An opinionated guide to the best Python frameworks, libraries, tools, and resources.
4+
5+
Use this curated list when you need to find a high-quality Python library or tool for tasks such as web development, data science, machine learning, AI agents, automation, testing, or DevOps. The trailing number on each entry is its star count on GitHub.
6+
7+
# Categories
8+
9+
{{ categories_md }}

website/tests/test_build.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pathlib import Path
1010

1111
from build import (
12+
annotate_entries_with_stars,
1213
build,
1314
detect_source_type,
1415
extract_entries,
@@ -108,6 +109,16 @@ def _make_repo(self, tmp_path, readme):
108109
"{% endblock %}",
109110
encoding="utf-8",
110111
)
112+
(tpl_dir / "llms.txt").write_text(
113+
"# Awesome Python\n"
114+
"\n"
115+
"Use this list to find Python tools.\n"
116+
"\n"
117+
"# Categories\n"
118+
"\n"
119+
"{{ categories_md }}\n",
120+
encoding="utf-8",
121+
)
111122

112123
def _copy_real_templates(self, tmp_path):
113124
real_tpl = Path(__file__).parent / ".." / "templates"
@@ -223,6 +234,7 @@ def test_build_creates_markdown_alternate_without_sponsors(self, tmp_path):
223234
## Widgets
224235
225236
- [w1](https://example.com) - A widget.
237+
- [w2](https://github.com/owner/w2) - A starred widget.
226238
227239
# Contributing
228240
@@ -231,6 +243,13 @@ def test_build_creates_markdown_alternate_without_sponsors(self, tmp_path):
231243
(tmp_path / "README.md").write_text(readme, encoding="utf-8")
232244
self._copy_real_templates(tmp_path)
233245

246+
data_dir = tmp_path / "website" / "data"
247+
data_dir.mkdir(parents=True)
248+
stars = {
249+
"owner/w2": {"stars": 42, "owner": "owner", "fetched_at": "2026-01-01T00:00:00+00:00"},
250+
}
251+
(data_dir / "github_stars.json").write_text(json.dumps(stars), encoding="utf-8")
252+
234253
build(tmp_path)
235254

236255
site = tmp_path / "website" / "output"
@@ -239,13 +258,23 @@ def test_build_creates_markdown_alternate_without_sponsors(self, tmp_path):
239258
llms_txt = (site / "llms.txt").read_text(encoding="utf-8")
240259

241260
assert '<link rel="alternate" type="text/markdown" href="/index.md" />' in index_html
242-
assert index_md == llms_txt
243261
assert index_md.startswith("# Awesome Python\n\nIntro.\n\n# Categories")
244262
assert "# **Sponsors**" not in index_md
245263
assert "Sponsor" not in index_md
246264
assert "SPONSORSHIP.md" not in index_md
247265
assert "## Widgets" in index_md
248266
assert "- [w1](https://example.com) - A widget." in index_md
267+
assert "- [w2](https://github.com/owner/w2) - A starred widget. (42 GitHub stars)" in index_md
268+
269+
assert llms_txt.startswith("# Awesome Python\n")
270+
assert "# Categories" in llms_txt
271+
assert "Use this curated list" in llms_txt
272+
assert "## Widgets" in llms_txt
273+
assert "- [w1](https://example.com) - A widget." in llms_txt
274+
assert "- [w2](https://github.com/owner/w2) - A starred widget. (42)" in llms_txt
275+
assert "{{ categories_md }}" not in llms_txt
276+
assert "# Contributing" not in llms_txt
277+
assert "Help!" not in llms_txt
249278

250279
def test_build_cleans_stale_output(self, tmp_path):
251280
readme = textwrap.dedent("""\
@@ -604,3 +633,59 @@ def test_source_type_detected(self):
604633
categories = [c for g in groups for c in g["categories"]]
605634
entries = extract_entries(categories, groups)
606635
assert entries[0]["source_type"] == "Built-in"
636+
637+
638+
# ---------------------------------------------------------------------------
639+
# annotate_entries_with_stars
640+
# ---------------------------------------------------------------------------
641+
642+
643+
class TestAnnotateEntriesWithStars:
644+
def test_appends_star_count_to_bullet(self):
645+
markdown = "- [foo](https://github.com/owner/foo) - A foo.\n"
646+
stars = {"owner/foo": {"stars": 123, "owner": "owner"}}
647+
assert annotate_entries_with_stars(markdown, stars) == (
648+
"- [foo](https://github.com/owner/foo) - A foo. (123 GitHub stars)\n"
649+
)
650+
651+
def test_uses_first_github_link(self):
652+
markdown = (
653+
"- [foo](https://github.com/owner/foo) - A foo. "
654+
"Also [bar](https://github.com/owner/bar).\n"
655+
)
656+
stars = {
657+
"owner/foo": {"stars": 10, "owner": "owner"},
658+
"owner/bar": {"stars": 99, "owner": "owner"},
659+
}
660+
assert annotate_entries_with_stars(markdown, stars) == (
661+
"- [foo](https://github.com/owner/foo) - A foo. "
662+
"Also [bar](https://github.com/owner/bar). (10 GitHub stars)\n"
663+
)
664+
665+
def test_skips_entries_without_star_data(self):
666+
markdown = "- [foo](https://github.com/owner/foo) - A foo.\n"
667+
assert annotate_entries_with_stars(markdown, {}) == markdown
668+
669+
def test_skips_non_github_links(self):
670+
markdown = "- [foo](https://example.com) - A foo.\n"
671+
stars = {"owner/foo": {"stars": 1, "owner": "owner"}}
672+
assert annotate_entries_with_stars(markdown, stars) == markdown
673+
674+
def test_skips_non_bullet_lines(self):
675+
markdown = "See [foo](https://github.com/owner/foo) for details.\n"
676+
stars = {"owner/foo": {"stars": 1, "owner": "owner"}}
677+
assert annotate_entries_with_stars(markdown, stars) == markdown
678+
679+
def test_handles_indented_bullets(self):
680+
markdown = " - [foo](https://github.com/owner/foo)\n"
681+
stars = {"owner/foo": {"stars": 7, "owner": "owner"}}
682+
assert annotate_entries_with_stars(markdown, stars) == (
683+
" - [foo](https://github.com/owner/foo) (7 GitHub stars)\n"
684+
)
685+
686+
def test_preserves_lines_without_trailing_newline(self):
687+
markdown = "- [foo](https://github.com/owner/foo) - A foo."
688+
stars = {"owner/foo": {"stars": 5, "owner": "owner"}}
689+
assert annotate_entries_with_stars(markdown, stars) == (
690+
"- [foo](https://github.com/owner/foo) - A foo. (5 GitHub stars)"
691+
)

0 commit comments

Comments
 (0)