Skip to content

Commit 443b352

Browse files
committed
feat: add --publish flag for understand-quickly registry integration
Adds a `--publish` flag (opt-in) that, after tutorial generation, emits a small `generic@1` knowledge-graph projection of the tutorial (abstractions as nodes; AnalyzeRelationships output + chapter ordering as edges) at `<output>/<project>/tutorial.json` with metadata.{tool, tool_version, generated_at, commit}. When `UNDERSTAND_QUICKLY_TOKEN` is set, also fires a `repository_dispatch` at `looptech-ai/understand-quickly` so the registry resyncs the entry. Without the token, only the local file is written — no network call, no failure. New module `utils/uq_publish.py` is stdlib-only (urllib, subprocess, json) — no new dependencies. Includes two unit tests using stdlib `unittest`. Spec: https://github.com/looptech-ai/understand-quickly/blob/main/docs/spec/code-graph-protocol.md Action: https://github.com/looptech-ai/uq-publish-action
1 parent c8a8ca1 commit 443b352

5 files changed

Lines changed: 340 additions & 0 deletions

File tree

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,26 @@ To run this project in a Docker container, you'll need to pass your API keys as
155155
```
156156
</details>
157157
158+
## 📡 Publish to understand-quickly (opt-in)
159+
160+
Add `--publish` to land the generated tutorial in [`looptech-ai/understand-quickly`](https://github.com/looptech-ai/understand-quickly), a public registry of code-knowledge graphs that ships an MCP server. The flag emits a small `generic@1` JSON projection of the tutorial (chapters as nodes, relationships as edges) at `<output>/<project>/tutorial.json` with `metadata.{tool, tool_version, generated_at, commit}`. If `UNDERSTAND_QUICKLY_TOKEN` is set, it also fires a `repository_dispatch` so the registry resyncs the entry.
161+
162+
```bash
163+
python main.py --repo https://github.com/example/demo --publish
164+
```
165+
166+
Without the token, only the local file is written. The drop-in CI step is the [`looptech-ai/uq-publish-action`](https://github.com/looptech-ai/uq-publish-action):
167+
168+
```yaml
169+
- uses: looptech-ai/uq-publish-action@v0.1.0
170+
with:
171+
graph-path: 'output/<project>/tutorial.json'
172+
format: 'generic@1'
173+
token: ${{ secrets.UNDERSTAND_QUICKLY_TOKEN }}
174+
```
175+
176+
Submitting via `--publish` is governed by the [Understand-Quickly Data License 1.0](https://github.com/looptech-ai/understand-quickly/blob/main/DATA-LICENSE.md). It is opt-in.
177+
158178
## 💡 Development Tutorial
159179
160180
- I built using [**Agentic Coding**](https://zacharyhuang.substack.com/p/agentic-coding-the-most-fun-way-to), the fastest development paradigm, where humans simply [design](docs/design.md) and agents [code](flow.py).

main.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,15 @@ def main():
5656
parser.add_argument("--no-cache", action="store_true", help="Disable LLM response caching (default: caching enabled)")
5757
# Add max_abstraction_num parameter to control the number of abstractions
5858
parser.add_argument("--max-abstractions", type=int, default=10, help="Maximum number of abstractions to identify (default: 10)")
59+
# Opt-in publish to the understand-quickly registry of code-knowledge graphs.
60+
# https://github.com/looptech-ai/understand-quickly
61+
parser.add_argument(
62+
"--publish",
63+
action="store_true",
64+
help="Emit a generic@1 knowledge-graph projection of the tutorial and (if "
65+
"UNDERSTAND_QUICKLY_TOKEN is set) dispatch it to the understand-quickly "
66+
"registry. Opt-in; default behavior is unchanged.",
67+
)
5968

6069
args = parser.parse_args()
6170

@@ -88,6 +97,9 @@ def main():
8897
# Add max_abstraction_num parameter
8998
"max_abstraction_num": args.max_abstractions,
9099

100+
# Opt-in publish to understand-quickly (looptech-ai/understand-quickly).
101+
"publish_to_uq": args.publish,
102+
91103
# Outputs will be populated by the nodes
92104
"files": [],
93105
"abstractions": [],

nodes.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -878,3 +878,30 @@ def exec(self, prep_res):
878878
def post(self, shared, prep_res, exec_res):
879879
shared["final_output_dir"] = exec_res # Store the output path
880880
print(f"\nTutorial generation complete! Files are in: {exec_res}")
881+
882+
# Opt-in: emit a generic@1 knowledge-graph projection and (if a token is
883+
# set) publish to the understand-quickly registry. Failures here never
884+
# affect tutorial generation — the markdown output is already written.
885+
if shared.get("publish_to_uq"):
886+
try:
887+
from pathlib import Path
888+
from utils.uq_publish import build_generic_graph, publish
889+
890+
source_dir = Path(shared["local_dir"]).resolve() if shared.get("local_dir") else None
891+
graph = build_generic_graph(
892+
project_name=shared["project_name"],
893+
abstractions=shared.get("abstractions", []),
894+
chapter_order=shared.get("chapter_order", []),
895+
relationships=shared.get("relationships", {}),
896+
repo_url=shared.get("repo_url"),
897+
source_dir=source_dir,
898+
)
899+
graph_path = Path(exec_res) / "tutorial.json"
900+
publish(
901+
graph,
902+
graph_path,
903+
repo_url=shared.get("repo_url"),
904+
source_dir=source_dir,
905+
)
906+
except Exception as exc:
907+
print(f"[uq-publish] warning: {exc}")

tests/test_uq_publish.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""Tests for utils/uq_publish.py — opt-in understand-quickly publish."""
2+
from __future__ import annotations
3+
4+
import json
5+
import os
6+
import sys
7+
import unittest
8+
from pathlib import Path
9+
from unittest import mock
10+
11+
# Add project root to path so `from utils.uq_publish import ...` works
12+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
13+
14+
from utils.uq_publish import build_generic_graph, publish, TOKEN_ENV # noqa: E402
15+
16+
17+
SAMPLE_ABSTRACTIONS = [
18+
{"name": "Flow", "description": "Pipeline orchestrator", "files": ["flow.py"]},
19+
{"name": "Node", "description": "Unit of work", "files": ["nodes.py"]},
20+
]
21+
SAMPLE_RELATIONSHIPS = {
22+
"summary": "PocketFlow runs nodes in a flow.",
23+
"details": [{"from": 0, "to": 1, "label": "contains"}],
24+
}
25+
26+
27+
class BuildGenericGraphTests(unittest.TestCase):
28+
def test_emits_generic_at_1_with_metadata(self) -> None:
29+
graph = build_generic_graph(
30+
project_name="demo",
31+
abstractions=SAMPLE_ABSTRACTIONS,
32+
chapter_order=[0, 1],
33+
relationships=SAMPLE_RELATIONSHIPS,
34+
repo_url="https://github.com/example/demo",
35+
source_dir=None,
36+
)
37+
self.assertEqual(graph["schema"], "generic@1")
38+
md = graph["metadata"]
39+
self.assertEqual(md["tool"], "pocketflow-tutorial-codebase-knowledge")
40+
self.assertEqual(md["project_name"], "demo")
41+
self.assertTrue(md["generated_at"].endswith("Z"))
42+
# Two abstractions -> two nodes; one relationship + one chapter-order edge.
43+
self.assertEqual(len(graph["nodes"]), 2)
44+
self.assertEqual(len(graph["edges"]), 2)
45+
kinds = sorted(e["kind"] for e in graph["edges"])
46+
self.assertEqual(kinds, ["next_chapter", "relationship"])
47+
48+
49+
class PublishTests(unittest.TestCase):
50+
def test_no_token_writes_file_and_skips_dispatch(self) -> None:
51+
graph = build_generic_graph(
52+
project_name="demo",
53+
abstractions=SAMPLE_ABSTRACTIONS,
54+
chapter_order=[0, 1],
55+
relationships=SAMPLE_RELATIONSHIPS,
56+
repo_url=None,
57+
source_dir=None,
58+
)
59+
env = {k: v for k, v in os.environ.items() if k != TOKEN_ENV}
60+
with mock.patch.dict(os.environ, env, clear=True):
61+
import tempfile
62+
with tempfile.TemporaryDirectory() as tmp:
63+
out = Path(tmp) / "tutorial.json"
64+
result = publish(graph, out, source_dir=Path(tmp))
65+
self.assertFalse(result["dispatched"])
66+
self.assertTrue(out.exists())
67+
data = json.loads(out.read_text())
68+
self.assertEqual(data["schema"], "generic@1")
69+
self.assertEqual(data["metadata"]["tool"],
70+
"pocketflow-tutorial-codebase-knowledge")
71+
72+
73+
if __name__ == "__main__":
74+
unittest.main()

utils/uq_publish.py

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
"""Opt-in understand-quickly registry publish for PocketFlow-Tutorial-Codebase-Knowledge.
2+
3+
Emits a small `generic@1` knowledge-graph projection of the generated tutorial
4+
(nodes = chapters, edges = chapter relationships) and, if a token is set,
5+
fires a `repository_dispatch` at the registry.
6+
7+
Stdlib-only — no new dependencies.
8+
9+
Spec: https://github.com/looptech-ai/understand-quickly/blob/main/docs/spec/code-graph-protocol.md
10+
"""
11+
from __future__ import annotations
12+
13+
import datetime as _dt
14+
import json
15+
import os
16+
import subprocess # nosec B404 — fixed argv, no shell
17+
import sys
18+
import urllib.error
19+
import urllib.request
20+
from pathlib import Path
21+
from typing import Any
22+
23+
TOOL_NAME = "pocketflow-tutorial-codebase-knowledge"
24+
TOOL_VERSION = "0.1.0"
25+
REGISTRY_REPO = "looptech-ai/understand-quickly"
26+
TOKEN_ENV = "UNDERSTAND_QUICKLY_TOKEN"
27+
DISPATCH_EVENT_TYPE = "uq-publish"
28+
29+
30+
def _git(args: list[str], cwd: Path) -> str | None:
31+
try:
32+
r = subprocess.run( # nosec B603
33+
["git", *args], cwd=str(cwd), capture_output=True, text=True,
34+
check=False, timeout=5,
35+
)
36+
except (FileNotFoundError, subprocess.SubprocessError):
37+
return None
38+
return r.stdout.strip() if r.returncode == 0 else None
39+
40+
41+
def _git_head(repo_dir: Path) -> str | None:
42+
sha = _git(["rev-parse", "HEAD"], repo_dir)
43+
return sha if sha and len(sha) == 40 else None
44+
45+
46+
def _detect_repo_slug(repo_dir: Path, repo_url: str | None = None) -> str | None:
47+
"""Best-effort `owner/repo` slug — honours `repo_url` first (PocketFlow
48+
typically tutorialises a remote repo, not the cwd)."""
49+
candidates: list[str] = []
50+
if repo_url:
51+
candidates.append(repo_url)
52+
origin = _git(["remote", "get-url", "origin"], repo_dir)
53+
if origin:
54+
candidates.append(origin)
55+
for url in candidates:
56+
for prefix in ("https://github.com/", "git@github.com:"):
57+
if url.startswith(prefix):
58+
slug = url[len(prefix):].removesuffix(".git")
59+
if slug and "/" in slug:
60+
return slug
61+
return None
62+
63+
64+
def build_generic_graph(
65+
*,
66+
project_name: str,
67+
abstractions: list[dict],
68+
chapter_order: list[int],
69+
relationships: dict,
70+
repo_url: str | None,
71+
source_dir: Path | None,
72+
) -> dict:
73+
"""Project the tutorial onto a `generic@1` node/edge graph.
74+
75+
Each abstraction becomes a node (kind=abstraction). The edges capture the
76+
`relationships.details` produced by AnalyzeRelationships and chapter ordering.
77+
"""
78+
nodes: list[dict] = []
79+
for i, abstr in enumerate(abstractions):
80+
nodes.append({
81+
"id": f"A{i}",
82+
"label": abstr.get("name", f"abstraction {i}"),
83+
"kind": "abstraction",
84+
"description": abstr.get("description", ""),
85+
"files": list(abstr.get("files", [])),
86+
"chapter_index": chapter_order.index(i) if i in chapter_order else None,
87+
})
88+
edges: list[dict] = []
89+
for rel in (relationships or {}).get("details", []):
90+
edges.append({
91+
"source": f"A{rel['from']}",
92+
"target": f"A{rel['to']}",
93+
"label": rel.get("label", ""),
94+
"kind": "relationship",
95+
})
96+
# Chapter-order edges (A_i -> A_{i+1}) for prerequisite-style traversal.
97+
for prev, curr in zip(chapter_order, chapter_order[1:]):
98+
edges.append({
99+
"source": f"A{prev}",
100+
"target": f"A{curr}",
101+
"kind": "next_chapter",
102+
})
103+
104+
commit = _git_head(source_dir) if source_dir else None
105+
metadata: dict[str, Any] = {
106+
"tool": TOOL_NAME,
107+
"tool_version": TOOL_VERSION,
108+
"generated_at": _dt.datetime.now(_dt.timezone.utc).strftime(
109+
"%Y-%m-%dT%H:%M:%SZ"
110+
),
111+
"project_name": project_name,
112+
"summary": (relationships or {}).get("summary", ""),
113+
}
114+
if commit:
115+
metadata["commit"] = commit
116+
if repo_url:
117+
metadata["repo_url"] = repo_url
118+
return {
119+
"schema": "generic@1",
120+
"metadata": metadata,
121+
"nodes": nodes,
122+
"edges": edges,
123+
}
124+
125+
126+
def write_graph(graph: dict, output_path: Path) -> Path:
127+
output_path.parent.mkdir(parents=True, exist_ok=True)
128+
output_path.write_text(json.dumps(graph, indent=2), encoding="utf-8")
129+
return output_path
130+
131+
132+
def dispatch(repo_slug: str, *, token: str, schema: str, graph_path: str,
133+
commit: str | None = None, timeout: float = 10.0) -> int:
134+
payload = {
135+
"event_type": DISPATCH_EVENT_TYPE,
136+
"client_payload": {
137+
"repo": repo_slug, "schema": schema, "graph_path": graph_path,
138+
"tool": TOOL_NAME, "tool_version": TOOL_VERSION,
139+
**({"commit": commit} if commit else {}),
140+
},
141+
}
142+
req = urllib.request.Request( # nosec B310 — fixed https URL
143+
f"https://api.github.com/repos/{REGISTRY_REPO}/dispatches",
144+
data=json.dumps(payload).encode("utf-8"),
145+
headers={
146+
"Accept": "application/vnd.github+json",
147+
"Authorization": f"Bearer {token}",
148+
"Content-Type": "application/json",
149+
"User-Agent": f"{TOOL_NAME}/{TOOL_VERSION}",
150+
"X-GitHub-Api-Version": "2022-11-28",
151+
},
152+
method="POST",
153+
)
154+
with urllib.request.urlopen(req, timeout=timeout) as resp: # nosec B310
155+
return resp.status
156+
157+
158+
def publish(
159+
graph: dict,
160+
output_path: Path,
161+
*,
162+
repo_url: str | None = None,
163+
source_dir: Path | None = None,
164+
token_env: str = TOKEN_ENV,
165+
log: Any = None,
166+
) -> dict[str, Any]:
167+
"""Write the graph and (if token set) dispatch. Never raises on network errors."""
168+
log = log or sys.stderr
169+
write_graph(graph, output_path)
170+
metadata = graph.get("metadata", {})
171+
172+
token = os.environ.get(token_env, "").strip()
173+
if not token:
174+
print(
175+
f"[uq-publish] wrote {output_path}; ${token_env} unset — "
176+
f"skipping registry dispatch (see "
177+
f"https://github.com/looptech-ai/uq-publish-action for CI use).",
178+
file=log,
179+
)
180+
return {"dispatched": False, "metadata": metadata}
181+
182+
repo_slug = _detect_repo_slug(source_dir or Path.cwd(), repo_url)
183+
if not repo_slug:
184+
print("[uq-publish] could not detect github repo slug — skipping dispatch.",
185+
file=log)
186+
return {"dispatched": False, "metadata": metadata}
187+
188+
try:
189+
status = dispatch(
190+
repo_slug, token=token, schema=graph.get("schema", "generic@1"),
191+
graph_path=str(output_path), commit=metadata.get("commit"),
192+
)
193+
except urllib.error.HTTPError as exc:
194+
if exc.code == 404:
195+
print(f"[uq-publish] {repo_slug} not in registry — register once with: "
196+
"npx @understand-quickly/cli add", file=log)
197+
return {"dispatched": False, "metadata": metadata, "registered": False}
198+
print(f"[uq-publish] dispatch failed ({exc.code}); local file written.",
199+
file=log)
200+
return {"dispatched": False, "metadata": metadata, "error": str(exc)}
201+
except (urllib.error.URLError, OSError) as exc:
202+
print(f"[uq-publish] dispatch failed ({exc}); local file written.", file=log)
203+
return {"dispatched": False, "metadata": metadata, "error": str(exc)}
204+
205+
print(f"[uq-publish] dispatched to {REGISTRY_REPO} (HTTP {status}) for "
206+
f"{repo_slug}.", file=log)
207+
return {"dispatched": True, "metadata": metadata, "status": status}

0 commit comments

Comments
 (0)