Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,26 @@ To run this project in a Docker container, you'll need to pass your API keys as
```
</details>

## 📡 Publish to understand-quickly (opt-in)

Add `--publish` to land the generated tutorial in [`looptech-ai/understand-quickly`](https://github.com/looptech-ai/understand-quickly), a public registry of code-knowledge graphs that ships an MCP server. The flag emits a small `generic@1` JSON projection of the tutorial (chapters as nodes, relationships as edges) at `<output>/<project>/tutorial.json` with `metadata.{tool, tool_version, generated_at, commit}`. If `UNDERSTAND_QUICKLY_TOKEN` is set, it also fires a `repository_dispatch` so the registry resyncs the entry.

```bash
python main.py --repo https://github.com/example/demo --publish
```

Without the token, only the local file is written. The drop-in CI step is the [`looptech-ai/uq-publish-action`](https://github.com/looptech-ai/uq-publish-action):

```yaml
- uses: looptech-ai/uq-publish-action@v0.1.0
with:
graph-path: 'output/<project>/tutorial.json'
format: 'generic@1'
token: ${{ secrets.UNDERSTAND_QUICKLY_TOKEN }}
```

Submitting via `--publish` is governed by the [Understand-Quickly Data License 1.0](https://github.com/looptech-ai/understand-quickly/blob/main/DATA-LICENSE.md). It is opt-in.

## 💡 Development Tutorial

- I built using [**Agentic Coding**](https://zacharyhuang.substack.com/p/agentic-coding-the-most-fun-way-to), the fastest development paradigm, where humans simply [design](docs/design.md) and agents [code](flow.py).
Expand Down
12 changes: 12 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,15 @@ def main():
parser.add_argument("--no-cache", action="store_true", help="Disable LLM response caching (default: caching enabled)")
# Add max_abstraction_num parameter to control the number of abstractions
parser.add_argument("--max-abstractions", type=int, default=10, help="Maximum number of abstractions to identify (default: 10)")
# Opt-in publish to the understand-quickly registry of code-knowledge graphs.
# https://github.com/looptech-ai/understand-quickly
parser.add_argument(
"--publish",
action="store_true",
help="Emit a generic@1 knowledge-graph projection of the tutorial and (if "
"UNDERSTAND_QUICKLY_TOKEN is set) dispatch it to the understand-quickly "
"registry. Opt-in; default behavior is unchanged.",
)

args = parser.parse_args()

Expand Down Expand Up @@ -88,6 +97,9 @@ def main():
# Add max_abstraction_num parameter
"max_abstraction_num": args.max_abstractions,

# Opt-in publish to understand-quickly (looptech-ai/understand-quickly).
"publish_to_uq": args.publish,

# Outputs will be populated by the nodes
"files": [],
"abstractions": [],
Expand Down
27 changes: 27 additions & 0 deletions nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,3 +878,30 @@ def exec(self, prep_res):
def post(self, shared, prep_res, exec_res):
shared["final_output_dir"] = exec_res # Store the output path
print(f"\nTutorial generation complete! Files are in: {exec_res}")

# Opt-in: emit a generic@1 knowledge-graph projection and (if a token is
# set) publish to the understand-quickly registry. Failures here never
# affect tutorial generation — the markdown output is already written.
if shared.get("publish_to_uq"):
try:
from pathlib import Path
from utils.uq_publish import build_generic_graph, publish

source_dir = Path(shared["local_dir"]).resolve() if shared.get("local_dir") else None
graph = build_generic_graph(
project_name=shared["project_name"],
abstractions=shared.get("abstractions", []),
chapter_order=shared.get("chapter_order", []),
relationships=shared.get("relationships", {}),
repo_url=shared.get("repo_url"),
source_dir=source_dir,
)
graph_path = Path(exec_res) / "tutorial.json"
publish(
graph,
graph_path,
repo_url=shared.get("repo_url"),
source_dir=source_dir,
)
except Exception as exc:
print(f"[uq-publish] warning: {exc}")
74 changes: 74 additions & 0 deletions tests/test_uq_publish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""Tests for utils/uq_publish.py — opt-in understand-quickly publish."""
from __future__ import annotations

import json
import os
import sys
import unittest
from pathlib import Path
from unittest import mock

# Add project root to path so `from utils.uq_publish import ...` works
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from utils.uq_publish import build_generic_graph, publish, TOKEN_ENV # noqa: E402


SAMPLE_ABSTRACTIONS = [
{"name": "Flow", "description": "Pipeline orchestrator", "files": ["flow.py"]},
{"name": "Node", "description": "Unit of work", "files": ["nodes.py"]},
]
SAMPLE_RELATIONSHIPS = {
"summary": "PocketFlow runs nodes in a flow.",
"details": [{"from": 0, "to": 1, "label": "contains"}],
}


class BuildGenericGraphTests(unittest.TestCase):
def test_emits_generic_at_1_with_metadata(self) -> None:
graph = build_generic_graph(
project_name="demo",
abstractions=SAMPLE_ABSTRACTIONS,
chapter_order=[0, 1],
relationships=SAMPLE_RELATIONSHIPS,
repo_url="https://github.com/example/demo",
source_dir=None,
)
self.assertEqual(graph["schema"], "generic@1")
md = graph["metadata"]
self.assertEqual(md["tool"], "pocketflow-tutorial-codebase-knowledge")
self.assertEqual(md["project_name"], "demo")
self.assertTrue(md["generated_at"].endswith("Z"))
# Two abstractions -> two nodes; one relationship + one chapter-order edge.
self.assertEqual(len(graph["nodes"]), 2)
self.assertEqual(len(graph["edges"]), 2)
kinds = sorted(e["kind"] for e in graph["edges"])
self.assertEqual(kinds, ["next_chapter", "relationship"])


class PublishTests(unittest.TestCase):
def test_no_token_writes_file_and_skips_dispatch(self) -> None:
graph = build_generic_graph(
project_name="demo",
abstractions=SAMPLE_ABSTRACTIONS,
chapter_order=[0, 1],
relationships=SAMPLE_RELATIONSHIPS,
repo_url=None,
source_dir=None,
)
env = {k: v for k, v in os.environ.items() if k != TOKEN_ENV}
with mock.patch.dict(os.environ, env, clear=True):
import tempfile
with tempfile.TemporaryDirectory() as tmp:
out = Path(tmp) / "tutorial.json"
result = publish(graph, out, source_dir=Path(tmp))
self.assertFalse(result["dispatched"])
self.assertTrue(out.exists())
data = json.loads(out.read_text())
self.assertEqual(data["schema"], "generic@1")
self.assertEqual(data["metadata"]["tool"],
"pocketflow-tutorial-codebase-knowledge")


if __name__ == "__main__":
unittest.main()
207 changes: 207 additions & 0 deletions utils/uq_publish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
"""Opt-in understand-quickly registry publish for PocketFlow-Tutorial-Codebase-Knowledge.

Emits a small `generic@1` knowledge-graph projection of the generated tutorial
(nodes = chapters, edges = chapter relationships) and, if a token is set,
fires a `repository_dispatch` at the registry.

Stdlib-only — no new dependencies.

Spec: https://github.com/looptech-ai/understand-quickly/blob/main/docs/spec/code-graph-protocol.md
"""
from __future__ import annotations

import datetime as _dt
import json
import os
import subprocess # nosec B404 — fixed argv, no shell
import sys
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any

TOOL_NAME = "pocketflow-tutorial-codebase-knowledge"
TOOL_VERSION = "0.1.0"
REGISTRY_REPO = "looptech-ai/understand-quickly"
TOKEN_ENV = "UNDERSTAND_QUICKLY_TOKEN"
DISPATCH_EVENT_TYPE = "uq-publish"


def _git(args: list[str], cwd: Path) -> str | None:
try:
r = subprocess.run( # nosec B603
["git", *args], cwd=str(cwd), capture_output=True, text=True,
check=False, timeout=5,
)
except (FileNotFoundError, subprocess.SubprocessError):
return None
return r.stdout.strip() if r.returncode == 0 else None


def _git_head(repo_dir: Path) -> str | None:
sha = _git(["rev-parse", "HEAD"], repo_dir)
return sha if sha and len(sha) == 40 else None


def _detect_repo_slug(repo_dir: Path, repo_url: str | None = None) -> str | None:
"""Best-effort `owner/repo` slug — honours `repo_url` first (PocketFlow
typically tutorialises a remote repo, not the cwd)."""
candidates: list[str] = []
if repo_url:
candidates.append(repo_url)
origin = _git(["remote", "get-url", "origin"], repo_dir)
if origin:
candidates.append(origin)
for url in candidates:
for prefix in ("https://github.com/", "git@github.com:"):
if url.startswith(prefix):
slug = url[len(prefix):].removesuffix(".git")
if slug and "/" in slug:
return slug
return None


def build_generic_graph(
*,
project_name: str,
abstractions: list[dict],
chapter_order: list[int],
relationships: dict,
repo_url: str | None,
source_dir: Path | None,
) -> dict:
"""Project the tutorial onto a `generic@1` node/edge graph.

Each abstraction becomes a node (kind=abstraction). The edges capture the
`relationships.details` produced by AnalyzeRelationships and chapter ordering.
"""
nodes: list[dict] = []
for i, abstr in enumerate(abstractions):
nodes.append({
"id": f"A{i}",
"label": abstr.get("name", f"abstraction {i}"),
"kind": "abstraction",
"description": abstr.get("description", ""),
"files": list(abstr.get("files", [])),
"chapter_index": chapter_order.index(i) if i in chapter_order else None,
})
edges: list[dict] = []
for rel in (relationships or {}).get("details", []):
edges.append({
"source": f"A{rel['from']}",
"target": f"A{rel['to']}",
"label": rel.get("label", ""),
"kind": "relationship",
})
# Chapter-order edges (A_i -> A_{i+1}) for prerequisite-style traversal.
for prev, curr in zip(chapter_order, chapter_order[1:]):
edges.append({
"source": f"A{prev}",
"target": f"A{curr}",
"kind": "next_chapter",
})

commit = _git_head(source_dir) if source_dir else None
metadata: dict[str, Any] = {
"tool": TOOL_NAME,
"tool_version": TOOL_VERSION,
"generated_at": _dt.datetime.now(_dt.timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%SZ"
),
"project_name": project_name,
"summary": (relationships or {}).get("summary", ""),
}
if commit:
metadata["commit"] = commit
if repo_url:
metadata["repo_url"] = repo_url
return {
"schema": "generic@1",
"metadata": metadata,
"nodes": nodes,
"edges": edges,
}


def write_graph(graph: dict, output_path: Path) -> Path:
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(graph, indent=2), encoding="utf-8")
return output_path


def dispatch(repo_slug: str, *, token: str, schema: str, graph_path: str,
commit: str | None = None, timeout: float = 10.0) -> int:
payload = {
"event_type": DISPATCH_EVENT_TYPE,
"client_payload": {
"repo": repo_slug, "schema": schema, "graph_path": graph_path,
"tool": TOOL_NAME, "tool_version": TOOL_VERSION,
**({"commit": commit} if commit else {}),
},
}
req = urllib.request.Request( # nosec B310 — fixed https URL
f"https://api.github.com/repos/{REGISTRY_REPO}/dispatches",
data=json.dumps(payload).encode("utf-8"),
headers={
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"User-Agent": f"{TOOL_NAME}/{TOOL_VERSION}",
"X-GitHub-Api-Version": "2022-11-28",
},
method="POST",
)
with urllib.request.urlopen(req, timeout=timeout) as resp: # nosec B310
return resp.status


def publish(
graph: dict,
output_path: Path,
*,
repo_url: str | None = None,
source_dir: Path | None = None,
token_env: str = TOKEN_ENV,
log: Any = None,
) -> dict[str, Any]:
"""Write the graph and (if token set) dispatch. Never raises on network errors."""
log = log or sys.stderr
write_graph(graph, output_path)
metadata = graph.get("metadata", {})

token = os.environ.get(token_env, "").strip()
if not token:
print(
f"[uq-publish] wrote {output_path}; ${token_env} unset — "
f"skipping registry dispatch (see "
f"https://github.com/looptech-ai/uq-publish-action for CI use).",
file=log,
)
return {"dispatched": False, "metadata": metadata}

repo_slug = _detect_repo_slug(source_dir or Path.cwd(), repo_url)
if not repo_slug:
print("[uq-publish] could not detect github repo slug — skipping dispatch.",
file=log)
return {"dispatched": False, "metadata": metadata}

try:
status = dispatch(
repo_slug, token=token, schema=graph.get("schema", "generic@1"),
graph_path=str(output_path), commit=metadata.get("commit"),
)
except urllib.error.HTTPError as exc:
if exc.code == 404:
print(f"[uq-publish] {repo_slug} not in registry — register once with: "
"npx @understand-quickly/cli add", file=log)
return {"dispatched": False, "metadata": metadata, "registered": False}
print(f"[uq-publish] dispatch failed ({exc.code}); local file written.",
file=log)
return {"dispatched": False, "metadata": metadata, "error": str(exc)}
except (urllib.error.URLError, OSError) as exc:
print(f"[uq-publish] dispatch failed ({exc}); local file written.", file=log)
return {"dispatched": False, "metadata": metadata, "error": str(exc)}

print(f"[uq-publish] dispatched to {REGISTRY_REPO} (HTTP {status}) for "
f"{repo_slug}.", file=log)
return {"dispatched": True, "metadata": metadata, "status": status}
Loading