From 7a4b08bd64feaed13e29bc5bec2f2dd723f8c486 Mon Sep 17 00:00:00 2001 From: phernandez Date: Thu, 4 Jun 2026 21:40:21 -0500 Subject: [PATCH 1/7] fix(ci): improve auto bm note narrative Signed-off-by: phernandez --- .github/basic-memory/memory-ci-capture.md | 38 ++- .github/workflows/basic-memory.yml | 2 + src/basic_memory/ci/README.md | 25 +- src/basic_memory/ci/project_updates.py | 288 ++++++++++++++++++++-- tests/ci/test_project_updates.py | 188 ++++++++++++-- tests/cli/test_ci_commands.py | 48 ++-- 6 files changed, 526 insertions(+), 63 deletions(-) diff --git a/.github/basic-memory/memory-ci-capture.md b/.github/basic-memory/memory-ci-capture.md index 7f3cde32..bffa3818 100644 --- a/.github/basic-memory/memory-ci-capture.md +++ b/.github/basic-memory/memory-ci-capture.md @@ -1,21 +1,45 @@ # Memory CI Capture -You turn GitHub delivery context into a concise project update synthesis for -Basic Memory. GitHub records the mechanics. Basic Memory remembers what changed -and why. +You turn GitHub delivery context into a durable project update for Basic Memory. +GitHub records the mechanics. Basic Memory remembers what changed and why. ## Inputs - Read `.github/basic-memory/project-update-context.json`. +- Read the PR diff before writing when a SHA is available. Useful commands: + `git show --stat --name-only ` and `git show --format=fuller --no-patch `. +- Use linked issue details, changed files, commit messages, PR body, labels, and + source links as evidence. - Treat GitHub payload fields as immutable facts. - Do not invent tests, deployment status, issues, or user impact. +## Writing Standard + +Do not write a fill-in-the-blanks note. Tell the story from the PR: +problem -> solution -> impact. + +Explain what problem was being addressed. If linked issue details are present, +use them. If they are absent, ground the problem in the PR body, title, commits, +and diff, and say when the original problem statement is unavailable. + +Explain why the fix solves the problem, what complexity it introduced, what it +refactored or removed, which components changed, and how the system is different +after the merge. Prefer specific component names, file paths, modules, commands, +and behavior over generic phrases. + ## Output Return only JSON that matches the provided AgentSynthesis schema: -- `summary`: what changed. -- `why_it_matters`: why this project update matters for future humans and agents. +- `summary`: one concise sentence; do not merely repeat the PR title. +- `story`: 2-4 sentences that connect problem -> solution -> impact. +- `problem_addressed`: the concrete problem, bug, missing capability, or delivery need. +- `solution`: why this change solves the problem. +- `system_impact`: how the system, workflow, or architecture changed after the merge. +- `why_it_matters`: durable project-memory context for future humans and agents. +- `components_changed`: modules, workflows, commands, schemas, docs, or services touched. +- `complexity_introduced`: tradeoffs, new moving parts, operational costs, or edge cases. +- `refactors_or_removals`: cleanup, simplification, deleted paths, or "none found". - `user_facing_changes`: visible behavior or product changes. - `internal_changes`: implementation, infrastructure, or operational changes. - `verification`: checks, tests, deploy evidence, or explicit unknowns. @@ -23,5 +47,5 @@ Return only JSON that matches the provided AgentSynthesis schema: - `decision_candidates`: explicit product or architecture decisions only. - `task_candidates`: concrete future tasks only. -Prefer source links and grounded phrasing. This is project memory, not marketing -copy and not a commit-by-commit changelog. +Use empty arrays only when a list truly has no grounded entries. This is project +memory, not marketing copy and not a commit-by-commit changelog. diff --git a/.github/workflows/basic-memory.yml b/.github/workflows/basic-memory.yml index 717fae96..04c39bed 100644 --- a/.github/workflows/basic-memory.yml +++ b/.github/workflows/basic-memory.yml @@ -31,6 +31,8 @@ jobs: - name: Collect project update context id: collect + env: + GITHUB_TOKEN: ${{ github.token }} run: | bm ci collect \ --config .github/basic-memory/config.yml \ diff --git a/src/basic_memory/ci/README.md b/src/basic_memory/ci/README.md index dc2beb56..4e815d5e 100644 --- a/src/basic_memory/ci/README.md +++ b/src/basic_memory/ci/README.md @@ -4,9 +4,11 @@ Basic Memory CI turns meaningful GitHub delivery moments into durable `project_update` notes in Basic Memory. GitHub records the mechanics: pull requests, workflow runs, SHAs, URLs, labels, -and timestamps. The agent reads those facts and writes a short synthesis of what -changed and why it matters. The Basic Memory CLI owns authentication, schema -guidance, idempotency, and publishing. +changed files, commits, linked issues, and timestamps. The agent reads those +facts and writes the delivery story: what problem was being addressed, why the +fix solved it, what changed in the system, what complexity or cleanup came with +it, and why future humans or agents should care. The Basic Memory CLI owns +authentication, schema guidance, idempotency, and publishing. The product voice is: @@ -127,15 +129,22 @@ Reads the current GitHub event payload and normalizes it into `ProjectUpdateContext`. This command decides whether the event is eligible. Merged pull requests and configured successful production deploy workflow runs are eligible. Routine CI runs, failed deploys, and unmerged PR closures are -no-ops. In v0, collection is intentionally limited to the GitHub event payload; -GitHub API enrichment for file lists, checks, reviews, or commit lists can be -added later without changing the publishing boundary. +no-ops. + +For merged pull requests, the generated workflow passes `GITHUB_TOKEN` to +`bm ci collect` so the context can include changed files, commit messages, and +linked issue details. If `GITHUB_TOKEN` is unavailable, local collection still +uses the event payload fields. If the token is present and GitHub API enrichment +fails, the Auto BM workflow fails fast instead of publishing a weak note. `bm ci agent-schema` Writes the optional `AgentSynthesis` JSON schema used by the generated workflow as a CI guardrail. This schema is not a Basic Memory domain schema and is not -committed by setup. +committed by setup. The schema intentionally requires narrative fields such as +`story`, `problem_addressed`, `solution`, `system_impact`, +`components_changed`, `complexity_introduced`, and `refactors_or_removals` so +the agent does more than fill out shallow buckets. `bm ci publish` @@ -180,7 +189,7 @@ project-updates/github/// - `ProjectUpdateConfig`: non-secret repo configuration. - `ProjectUpdateContext`: normalized immutable GitHub facts. -- `AgentSynthesis`: agent-authored summary fields. +- `AgentSynthesis`: agent-authored narrative fields. - `ProjectUpdateNote`: final Basic Memory note payload. - workflow, prompt, and schema-note seed rendering. diff --git a/src/basic_memory/ci/project_updates.py b/src/basic_memory/ci/project_updates.py index ad5cd9dd..2e87d744 100644 --- a/src/basic_memory/ci/project_updates.py +++ b/src/basic_memory/ci/project_updates.py @@ -7,8 +7,11 @@ from __future__ import annotations import json +import os import re import subprocess +import urllib.error +import urllib.request from pathlib import Path from typing import Any, Literal @@ -46,6 +49,34 @@ def _non_empty_list(cls, value: list[str]) -> list[str]: return cleaned +class ChangedFile(BaseModel): + """A GitHub pull request file summary.""" + + filename: str + status: str | None = None + additions: int | None = None + deletions: int | None = None + changes: int | None = None + + +class CommitSummary(BaseModel): + """A compact GitHub pull request commit summary.""" + + sha: str | None = None + message: str | None = None + author: str | None = None + + +class LinkedIssueDetail(BaseModel): + """GitHub issue context referenced by a pull request.""" + + number: int + title: str | None = None + body_excerpt: str | None = None + state: str | None = None + url: str | None = None + + class ProjectUpdateContext(BaseModel): """Normalized facts collected from a GitHub event payload.""" @@ -67,7 +98,10 @@ class ProjectUpdateContext(BaseModel): author: str | None = None labels: list[str] = Field(default_factory=list) linked_issues: list[str] = Field(default_factory=list) + linked_issue_details: list[LinkedIssueDetail] = Field(default_factory=list) + changed_files: list[ChangedFile] = Field(default_factory=list) changed_files_count: int | None = None + commits: list[CommitSummary] = Field(default_factory=list) class AgentSynthesis(BaseModel): @@ -76,7 +110,14 @@ class AgentSynthesis(BaseModel): model_config = ConfigDict(extra="ignore") summary: str + story: str + problem_addressed: str + solution: str + system_impact: str why_it_matters: str + components_changed: list[str] = Field(default_factory=list) + complexity_introduced: list[str] = Field(default_factory=list) + refactors_or_removals: list[str] = Field(default_factory=list) user_facing_changes: list[str] = Field(default_factory=list) internal_changes: list[str] = Field(default_factory=list) verification: list[str] = Field(default_factory=list) @@ -86,6 +127,10 @@ class AgentSynthesis(BaseModel): @field_validator( "summary", + "story", + "problem_addressed", + "solution", + "system_impact", "why_it_matters", ) @classmethod @@ -226,6 +271,148 @@ def _linked_issues(*texts: str | None) -> list[str]: return issues +def _github_api_get(path: str, token: str) -> list[Any] | dict[str, Any]: + request = urllib.request.Request( + f"https://api.github.com{path}", + headers={ + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "User-Agent": "basic-memory-ci", + "X-GitHub-Api-Version": "2022-11-28", + }, + ) + try: + with urllib.request.urlopen(request, timeout=20) as response: + payload = json.loads(response.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + raise ValueError(f"GitHub API request failed ({exc.code}) for {path}: {body}") from exc + except urllib.error.URLError as exc: + raise ValueError(f"GitHub API request failed for {path}: {exc.reason}") from exc + if not isinstance(payload, (list, dict)): + raise ValueError(f"GitHub API response for {path} must be a JSON object or array") + return payload + + +def _github_api_get_list(path: str, token: str) -> list[Any]: + items: list[Any] = [] + page = 1 + while True: + separator = "&" if "?" in path else "?" + payload = _github_api_get(f"{path}{separator}per_page=100&page={page}", token) + if not isinstance(payload, list): + raise ValueError(f"GitHub API response for {path} must be a JSON array") + items.extend(payload) + if len(payload) < 100: + return items + page += 1 + + +def _text_or_none(value: Any) -> str | None: + return value if isinstance(value, str) else None + + +def _int_or_none(value: Any) -> int | None: + return value if isinstance(value, int) else None + + +def _body_excerpt(value: Any, *, limit: int = 2000) -> str | None: + if not isinstance(value, str): + return None + stripped = value.strip() + if len(stripped) <= limit: + return stripped + return stripped[: limit - 15].rstrip() + "... [truncated]" + + +def _changed_file_from_github(raw: Any) -> ChangedFile | None: + if not isinstance(raw, dict) or not isinstance(raw.get("filename"), str): + return None + return ChangedFile( + filename=raw["filename"], + status=_text_or_none(raw.get("status")), + additions=_int_or_none(raw.get("additions")), + deletions=_int_or_none(raw.get("deletions")), + changes=_int_or_none(raw.get("changes")), + ) + + +def _commit_summary_from_github(raw: Any) -> CommitSummary | None: + if not isinstance(raw, dict): + return None + commit = raw.get("commit") + if not isinstance(commit, dict): + return None + author = commit.get("author") + return CommitSummary( + sha=_text_or_none(raw.get("sha")), + message=_text_or_none(commit.get("message")), + author=_text_or_none(author.get("name")) if isinstance(author, dict) else None, + ) + + +def _issue_number(issue: str) -> int | None: + match = re.fullmatch(r"#(?P\d+)", issue) + return int(match.group("number")) if match else None + + +def _issue_detail_from_github(raw: Any) -> LinkedIssueDetail | None: + if not isinstance(raw, dict) or not isinstance(raw.get("number"), int): + return None + return LinkedIssueDetail( + number=raw["number"], + title=_text_or_none(raw.get("title")), + body_excerpt=_body_excerpt(raw.get("body")), + state=_text_or_none(raw.get("state")), + url=_text_or_none(raw.get("html_url")), + ) + + +def _enrich_pull_request_context(context: ProjectUpdateContext) -> ProjectUpdateContext: + token = os.environ.get("GITHUB_TOKEN") + if not token or not context.repo or context.pr_number is None: + return context + + files = [ + file + for file in ( + _changed_file_from_github(raw) + for raw in _github_api_get_list( + f"/repos/{context.repo}/pulls/{context.pr_number}/files", token + ) + ) + if file is not None + ] + commits = [ + commit + for commit in ( + _commit_summary_from_github(raw) + for raw in _github_api_get_list( + f"/repos/{context.repo}/pulls/{context.pr_number}/commits", token + ) + ) + if commit is not None + ] + issue_details: list[LinkedIssueDetail] = [] + for issue in context.linked_issues: + number = _issue_number(issue) + if number is None: + continue + detail = _issue_detail_from_github( + _github_api_get(f"/repos/{context.repo}/issues/{number}", token) + ) + if detail is not None: + issue_details.append(detail) + + return context.model_copy( + update={ + "changed_files": files, + "commits": commits, + "linked_issue_details": issue_details, + } + ) + + def _collect_pull_request_context(payload: dict[str, Any]) -> ProjectUpdateContext: pr = payload.get("pull_request") if not isinstance(pr, dict): @@ -258,7 +445,7 @@ def _collect_pull_request_context(payload: dict[str, Any]) -> ProjectUpdateConte if repo and isinstance(number, int): idempotency_key = f"github:{repo}:{PULL_REQUEST_MERGED}:{number}" - return ProjectUpdateContext( + context = ProjectUpdateContext( eligible=True, source_event=PULL_REQUEST_MERGED, repo=repo, @@ -277,6 +464,7 @@ def _collect_pull_request_context(payload: dict[str, Any]) -> ProjectUpdateConte pr["changed_files"] if isinstance(pr.get("changed_files"), int) else None ), ) + return _enrich_pull_request_context(context) def _collect_workflow_run_context( @@ -386,10 +574,21 @@ def build_project_update_note( f"# {_note_title(context)}", "## Summary", synthesis.summary, - "## Why It Matters", + "## Story", + synthesis.story, + "## Problem Addressed", + synthesis.problem_addressed, + "## How The Change Solves It", + synthesis.solution, + "## Impact On The System", + synthesis.system_impact, + "## Project Memory", synthesis.why_it_matters, ] + _extend_list_section(sections, "Components Changed", synthesis.components_changed) + _extend_list_section(sections, "Complexity Introduced", synthesis.complexity_introduced) + _extend_list_section(sections, "Refactors Or Removals", synthesis.refactors_or_removals) _extend_list_section(sections, "User-Facing Changes", synthesis.user_facing_changes) _extend_list_section(sections, "Internal Changes", synthesis.internal_changes) _extend_list_section(sections, "Verification", synthesis.verification) @@ -409,6 +608,7 @@ def build_project_update_note( observations = [ f"- [summary] {synthesis.summary}", + f"- [impact] {synthesis.system_impact}", f"- [source] GitHub {context.source_event} in {context.repo}", ] sections.extend(["## Observations", *observations]) @@ -432,7 +632,14 @@ def render_agent_synthesis_schema() -> str: """Render the optional Codex structured-output schema guardrail.""" properties = { "summary": {"type": "string", "minLength": 1}, + "story": {"type": "string", "minLength": 1}, + "problem_addressed": {"type": "string", "minLength": 1}, + "solution": {"type": "string", "minLength": 1}, + "system_impact": {"type": "string", "minLength": 1}, "why_it_matters": {"type": "string", "minLength": 1}, + "components_changed": {"type": "array", "items": {"type": "string"}}, + "complexity_introduced": {"type": "array", "items": {"type": "string"}}, + "refactors_or_removals": {"type": "array", "items": {"type": "string"}}, "user_facing_changes": {"type": "array", "items": {"type": "string"}}, "internal_changes": {"type": "array", "items": {"type": "string"}}, "verification": {"type": "array", "items": {"type": "string"}}, @@ -455,22 +662,46 @@ def render_capture_prompt() -> str: """Render the prompt contract used by the generated workflow.""" return """# Memory CI Capture -You turn GitHub delivery context into a concise project update synthesis for -Basic Memory. GitHub records the mechanics. Basic Memory remembers what changed -and why. +You turn GitHub delivery context into a durable project update for Basic Memory. +GitHub records the mechanics. Basic Memory remembers what changed and why. ## Inputs - Read `.github/basic-memory/project-update-context.json`. +- Read the PR diff before writing when a SHA is available. Useful commands: + `git show --stat --name-only ` and `git show --format=fuller --no-patch `. +- Use linked issue details, changed files, commit messages, PR body, labels, and + source links as evidence. - Treat GitHub payload fields as immutable facts. - Do not invent tests, deployment status, issues, or user impact. +## Writing Standard + +Do not write a fill-in-the-blanks note. Tell the story from the PR: +problem -> solution -> impact. + +Explain what problem was being addressed. If linked issue details are present, +use them. If they are absent, ground the problem in the PR body, title, commits, +and diff, and say when the original problem statement is unavailable. + +Explain why the fix solves the problem, what complexity it introduced, what it +refactored or removed, which components changed, and how the system is different +after the merge. Prefer specific component names, file paths, modules, commands, +and behavior over generic phrases. + ## Output Return only JSON that matches the provided AgentSynthesis schema: -- `summary`: what changed. -- `why_it_matters`: why this project update matters for future humans and agents. +- `summary`: one concise sentence; do not merely repeat the PR title. +- `story`: 2-4 sentences that connect problem -> solution -> impact. +- `problem_addressed`: the concrete problem, bug, missing capability, or delivery need. +- `solution`: why this change solves the problem. +- `system_impact`: how the system, workflow, or architecture changed after the merge. +- `why_it_matters`: durable project-memory context for future humans and agents. +- `components_changed`: modules, workflows, commands, schemas, docs, or services touched. +- `complexity_introduced`: tradeoffs, new moving parts, operational costs, or edge cases. +- `refactors_or_removals`: cleanup, simplification, deleted paths, or "none found". - `user_facing_changes`: visible behavior or product changes. - `internal_changes`: implementation, infrastructure, or operational changes. - `verification`: checks, tests, deploy evidence, or explicit unknowns. @@ -478,8 +709,8 @@ def render_capture_prompt() -> str: - `decision_candidates`: explicit product or architecture decisions only. - `task_candidates`: concrete future tasks only. -Prefer source links and grounded phrasing. This is project memory, not marketing -copy and not a commit-by-commit changelog. +Use empty arrays only when a list truly has no grounded entries. This is project +memory, not marketing copy and not a commit-by-commit changelog. """ @@ -521,6 +752,8 @@ def render_workflow(config: ProjectUpdateConfig) -> str: - name: Collect project update context id: collect + env: + GITHUB_TOKEN: ${{{{ github.token }}}} run: | bm ci collect \\ --config {DEFAULT_CONFIG_PATH} \\ @@ -572,7 +805,14 @@ def schema_seed_specs() -> list[SchemaSeedSpec]: entity="ProjectUpdate", schema={ "summary": "string, concise account of what changed", - "why_it_matters": "string, why this update matters", + "story": "string, narrative connecting problem -> solution -> impact", + "problem_addressed": "string, concrete problem or delivery need", + "solution": "string, why the change solves the problem", + "system_impact": "string, impact on system behavior or architecture", + "why_it_matters": "string, durable context for future humans and agents", + "components_changed": "array, modules, workflows, commands, or services touched", + "complexity_introduced": "array, tradeoffs or new moving parts", + "refactors_or_removals": "array, cleanup, simplification, or deleted paths", "source": "string, source system such as github", "source_event": ("string, pull_request_merged or production_deploy_succeeded"), "repo": "string, owner/repository", @@ -585,8 +825,12 @@ def schema_seed_specs() -> list[SchemaSeedSpec]: "environment?": "string, deployment environment", }, body=( - "A ProjectUpdate preserves what changed in a project and why it matters. " - "GitHub records mechanics; Basic Memory keeps the durable narrative." + "A ProjectUpdate preserves what changed in a project and the durable " + "context future readers need. " + "It should tell the delivery story: the problem, why the solution worked, " + "what components changed, what complexity or cleanup followed, and the " + "impact on the system. GitHub records mechanics; Basic Memory keeps the " + "durable narrative." ), ), _schema_seed( @@ -594,15 +838,22 @@ def schema_seed_specs() -> list[SchemaSeedSpec]: entity="GitHubPullRequestUpdate", schema={ "intent": "string, purpose of the merged pull request", + "problem_addressed": "string, issue, bug, missing capability, or workflow pain", + "solution": "string, why this implementation solves the problem", + "system_impact": "string, behavior, architecture, or workflow impact", "changed_area?(array)": "string, product or implementation areas touched", + "components_changed?(array)": "string, modules, workflows, commands, or docs touched", + "complexity_introduced?(array)": "string, tradeoffs or new moving parts", + "refactors_or_removals?(array)": "string, cleanup, simplification, or deleted paths", "linked_issue?(array)": "string, issues closed or advanced", "verification?(array)": "string, checks and tests observed", "follow_up?(array)": "string, concrete remaining work", }, body=( - "Guidance for pull request project updates: preserve intent, changed " - "behavior, review tradeoffs, issue links, and verification. Do not " - "summarize commit by commit unless that is the clearest explanation." + "Guidance for pull request project updates: preserve the story behind " + "the PR, not just the title. Explain the problem, the fix, why it works, " + "changed components, tradeoffs, cleanup, issue links, and verification. " + "Do not summarize commit by commit unless that is the clearest explanation." ), ), _schema_seed( @@ -612,13 +863,18 @@ def schema_seed_specs() -> list[SchemaSeedSpec]: "deployed_sha": "string, deployed commit SHA", "environment": "string, production environment", "workflow_run_id": "string, GitHub Actions workflow run id", + "story": "string, what changed since the previous production deploy", + "system_impact": "string, production impact on behavior or operations", + "components_changed?(array)": "string, deployed modules, services, or workflows", + "complexity_introduced?(array)": "string, operational tradeoffs or new moving parts", "verification?(array)": "string, deploy evidence and smoke checks", "user_impact?(array)": "string, user-facing impact since previous deploy", "rollback_note?": "string, rollback or mitigation note when known", }, body=( "Guidance for production deploy project updates: preserve what actually " - "reached production, the deployed SHA, environment, workflow run, and " + "reached production, the durable context, the deployed SHA, environment, " + "workflow run, changed components, operational complexity, and " "verification evidence. Do not overclaim beyond the source facts." ), ), diff --git a/tests/ci/test_project_updates.py b/tests/ci/test_project_updates.py index 559d16c3..340fe7cb 100644 --- a/tests/ci/test_project_updates.py +++ b/tests/ci/test_project_updates.py @@ -49,6 +49,32 @@ def _pr_payload(*, merged: bool = True) -> dict: } +def _synthesis_payload(**overrides: object) -> dict[str, object]: + payload: dict[str, object] = { + "summary": "Auto BM now records project updates.", + "story": ( + "GitHub delivery events were losing their useful narrative after merge. " + "Auto BM collects source facts, lets the agent explain the change, and " + "publishes the result as durable project memory." + ), + "problem_addressed": "Project delivery context was not preserved after GitHub events.", + "solution": "Collect GitHub facts and publish an idempotent Basic Memory note.", + "system_impact": "Future humans and agents can recover the delivery narrative.", + "why_it_matters": "Future agents can recover project context.", + "components_changed": ["basic_memory.ci.project_updates"], + "complexity_introduced": [], + "refactors_or_removals": [], + "user_facing_changes": [], + "internal_changes": [], + "verification": [], + "follow_ups": [], + "decision_candidates": [], + "task_candidates": [], + } + payload.update(overrides) + return payload + + def test_collect_merged_pull_request_context(tmp_path: Path) -> None: event_path = _write_json(tmp_path / "event.json", _pr_payload()) @@ -69,6 +95,62 @@ def test_collect_merged_pull_request_context(tmp_path: Path) -> None: assert context.source_url == "https://github.com/basicmachines-co/basic-memory/pull/123" +def test_collect_enriches_pull_request_context_from_github_api( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + def fake_github_api_get(path: str, token: str) -> list[dict] | dict: + assert token == "github-token" + if path.startswith("/repos/basicmachines-co/basic-memory/pulls/123/files"): + return [ + { + "filename": "src/basic_memory/ci/project_updates.py", + "status": "modified", + "additions": 42, + "deletions": 7, + "changes": 49, + } + ] + if path.startswith("/repos/basicmachines-co/basic-memory/pulls/123/commits"): + return [ + { + "sha": "abc123def456", + "commit": { + "message": "fix ci synthesis schema\n\nRequire all fields.", + "author": {"name": "Pat"}, + }, + } + ] + if path == "/repos/basicmachines-co/basic-memory/issues/77": + return { + "number": 77, + "title": "Codex structured output rejects optional schema fields", + "body": "Auto BM failed before publish when optional fields were omitted.", + "html_url": "https://github.com/basicmachines-co/basic-memory/issues/77", + "state": "closed", + } + raise AssertionError(f"unexpected GitHub API path: {path}") + + monkeypatch.setenv("GITHUB_TOKEN", "github-token") + monkeypatch.setattr(project_updates, "_github_api_get", fake_github_api_get, raising=False) + event_path = _write_json(tmp_path / "event.json", _pr_payload()) + + context = collect_project_update_context( + event_name="pull_request", + event_path=event_path, + config=ProjectUpdateConfig(project="team-memory"), + ) + + assert context.changed_files[0].filename == "src/basic_memory/ci/project_updates.py" + assert context.changed_files[0].status == "modified" + assert context.commits[0].message == "fix ci synthesis schema\n\nRequire all fields." + assert context.linked_issue_details[0].number == 77 + assert ( + context.linked_issue_details[0].title + == "Codex structured output rejects optional schema fields" + ) + + def test_collect_handles_sparse_pull_request_payload(tmp_path: Path) -> None: payload = { "action": "closed", @@ -282,13 +364,12 @@ def test_build_project_update_note_uses_deterministic_identity_fields(tmp_path: config=ProjectUpdateConfig(project="team-memory"), ) synthesis = AgentSynthesis.model_validate( - { - "summary": "Auto BM now records project updates.", - "why_it_matters": "Future agents can recover the delivery narrative.", - "repo": "evil/repo", - "source_event": "production_deploy_succeeded", - "verification": ["Unit tests cover event normalization."], - } + _synthesis_payload( + why_it_matters="Future agents can recover the delivery narrative.", + repo="evil/repo", + source_event="production_deploy_succeeded", + verification=["Unit tests cover event normalization."], + ) ) note = build_project_update_note(context=context, synthesis=synthesis) @@ -301,6 +382,48 @@ def test_build_project_update_note_uses_deterministic_identity_fields(tmp_path: assert "evil/repo" not in note.content +def test_build_project_update_note_renders_story_sections(tmp_path: Path) -> None: + event_path = _write_json(tmp_path / "event.json", _pr_payload()) + context = collect_project_update_context( + event_name="pull_request", + event_path=event_path, + config=ProjectUpdateConfig(project="team-memory"), + ) + synthesis = AgentSynthesis.model_validate( + { + "summary": "Auto BM now publishes durable project updates.", + "story": ( + "Auto BM needed to preserve the delivery narrative, not just the mechanics. " + "The change adds a CI handoff where Codex synthesizes context and bm publishes it." + ), + "problem_addressed": "Project context was lost after meaningful GitHub delivery events.", + "solution": "Collect GitHub facts, let Codex synthesize intent, then publish idempotently.", + "system_impact": "Merges now leave durable memory for future humans and agents.", + "why_it_matters": "Future work can recover why the delivery happened.", + "components_changed": [ + "basic_memory.ci.project_updates", + "basic_memory.cli.commands.ci", + ], + "complexity_introduced": ["Adds a CI-only agent synthesis boundary."], + "refactors_or_removals": ["Keeps Basic Memory auth out of the agent step."], + "verification": ["Unit tests cover collect and publish behavior."], + } + ) + + note = build_project_update_note(context=context, synthesis=synthesis) + + assert "## Story" in note.content + assert "## Problem Addressed" in note.content + assert "## How The Change Solves It" in note.content + assert "## Impact On The System" in note.content + assert "## Project Memory" in note.content + assert "## Why It Matters" not in note.content + assert "## Components Changed" in note.content + assert "basic_memory.ci.project_updates" in note.content + assert "## Complexity Introduced" in note.content + assert "## Refactors Or Removals" in note.content + + def test_build_project_update_note_for_production_deploy(tmp_path: Path) -> None: payload = { "action": "completed", @@ -326,9 +449,18 @@ def test_build_project_update_note_for_production_deploy(tmp_path: Path) -> None production_environments=["production"], ), ) - synthesis = AgentSynthesis( - summary="Production deploy completed.", - why_it_matters="The latest project update reached users.", + synthesis = AgentSynthesis.model_validate( + _synthesis_payload( + summary="Production deploy completed.", + story=( + "A configured production workflow completed successfully. " + "The deploy SHA is now recorded as durable project memory." + ), + problem_addressed="Production delivery needed a durable deployment record.", + solution="Publish a project update for the successful workflow run.", + system_impact="The production deploy is connected to its workflow run and SHA.", + why_it_matters="The latest project update reached users.", + ) ) note = build_project_update_note(context=context, synthesis=synthesis) @@ -342,9 +474,11 @@ def test_build_project_update_note_for_production_deploy(tmp_path: Path) -> None def test_build_project_update_note_rejects_invalid_context() -> None: - synthesis = AgentSynthesis( - summary="Auto BM records project updates.", - why_it_matters="Future agents can recover context.", + synthesis = AgentSynthesis.model_validate( + _synthesis_payload( + summary="Auto BM records project updates.", + why_it_matters="Future agents can recover context.", + ) ) with pytest.raises(ValueError, match="ineligible"): build_project_update_note( @@ -364,11 +498,23 @@ def test_build_project_update_note_rejects_invalid_context() -> None: def test_agent_synthesis_requires_summary_and_why_it_matters() -> None: + missing_why = _synthesis_payload() + missing_why.pop("why_it_matters") with pytest.raises(ValidationError): - AgentSynthesis.model_validate({"summary": "Too thin"}) + AgentSynthesis.model_validate(missing_why) with pytest.raises(ValidationError): - AgentSynthesis.model_validate({"summary": " ", "why_it_matters": "Still too thin"}) + AgentSynthesis.model_validate(_synthesis_payload(summary=" ")) + + +def test_agent_synthesis_requires_delivery_narrative_fields() -> None: + with pytest.raises(ValidationError): + AgentSynthesis.model_validate( + { + "summary": "Auto BM records project updates.", + "why_it_matters": "Future agents can recover context.", + } + ) def test_project_update_config_requires_non_empty_lists() -> None: @@ -393,6 +539,7 @@ def test_render_workflow_invokes_codex_read_only_without_basic_memory_secret() - assert "BASIC_MEMORY_CI_CLOUD_HOST: ${{ vars.BASIC_MEMORY_CLOUD_HOST }}" in workflow assert 'if [ -n "$BASIC_MEMORY_CI_CLOUD_HOST" ]' in workflow assert "--context .github/basic-memory/project-update-context.json" in workflow + assert "GITHUB_TOKEN: ${{ github.token }}" in workflow assert "--cloud \\" in workflow codex_step = workflow.split("- name: Synthesize project update with Codex", 1)[1].split( "- name: Publish project update", 1 @@ -415,6 +562,9 @@ def test_render_capture_prompt_uses_workspace_context_path() -> None: assert ".github/basic-memory/project-update-context.json" in prompt assert "${{ runner.temp }}" not in prompt + assert "Do not write a fill-in-the-blanks note" in prompt + assert "Read the PR diff before writing" in prompt + assert "problem -> solution -> impact" in prompt def test_render_agent_synthesis_schema_is_ci_guardrail_not_domain_schema() -> None: @@ -422,6 +572,11 @@ def test_render_agent_synthesis_schema_is_ci_guardrail_not_domain_schema() -> No assert schema["title"] == "AgentSynthesis" assert "summary" in schema["required"] + assert "story" in schema["required"] + assert "problem_addressed" in schema["required"] + assert "solution" in schema["required"] + assert "system_impact" in schema["required"] + assert "components_changed" in schema["required"] assert "why_it_matters" in schema["required"] assert set(schema["required"]) == set(schema["properties"]) assert "project_update" not in json.dumps(schema) @@ -437,6 +592,9 @@ def test_schema_seed_specs_are_basic_memory_schema_notes() -> None: } assert all(spec.metadata["type"] == "schema" for spec in specs) assert all(spec.metadata["settings"]["validation"] == "warn" for spec in specs) + project_update = next(spec for spec in specs if spec.entity == "ProjectUpdate") + assert "story" in project_update.metadata["schema"] + assert "problem_addressed" in project_update.metadata["schema"] def test_parse_github_remote_accepts_https_and_ssh() -> None: diff --git a/tests/cli/test_ci_commands.py b/tests/cli/test_ci_commands.py index 657a03b0..8f297a40 100644 --- a/tests/cli/test_ci_commands.py +++ b/tests/cli/test_ci_commands.py @@ -43,6 +43,32 @@ def _write_pr_event(path: Path) -> Path: return path +def _synthesis_payload(**overrides: object) -> dict[str, object]: + payload: dict[str, object] = { + "summary": "Auto BM records project updates.", + "story": ( + "GitHub delivery moments were not leaving durable project memory. " + "Auto BM collects source facts, asks the agent for the delivery story, " + "and publishes an idempotent note." + ), + "problem_addressed": "GitHub delivery context was lost after merge.", + "solution": "Publish an idempotent Basic Memory project update from CI.", + "system_impact": "Future agents can recover the project delivery narrative.", + "why_it_matters": "Future agents can recover project context.", + "components_changed": ["basic_memory.ci.project_updates"], + "complexity_introduced": [], + "refactors_or_removals": [], + "user_facing_changes": [], + "internal_changes": [], + "verification": [], + "follow_ups": [], + "decision_candidates": [], + "task_candidates": [], + } + payload.update(overrides) + return payload + + @patch("basic_memory.cli.commands.ci.seed_project_update_schemas", new_callable=AsyncMock) def test_setup_writes_workflow_config_and_prompt( mock_seed: AsyncMock, @@ -210,11 +236,9 @@ def test_publish_command_upserts_project_update_note( assert collect_result.exit_code == 0, collect_result.output synthesis_path.write_text( json.dumps( - { - "summary": "Auto BM records project updates.", - "why_it_matters": "Future agents can recover project context.", - "repo": "evil/repo", - } + _synthesis_payload( + repo="evil/repo", + ) ), encoding="utf-8", ) @@ -288,12 +312,7 @@ def test_publish_command_preserves_existing_note_path_for_idempotency_match( ) assert collect_result.exit_code == 0, collect_result.output synthesis_path.write_text( - json.dumps( - { - "summary": "Auto BM records project updates.", - "why_it_matters": "Future agents can recover project context.", - } - ), + json.dumps(_synthesis_payload()), encoding="utf-8", ) @@ -352,12 +371,7 @@ def test_publish_command_uses_project_id_without_workspace_qualifying_project( ) assert collect_result.exit_code == 0, collect_result.output synthesis_path.write_text( - json.dumps( - { - "summary": "Auto BM records project updates.", - "why_it_matters": "Future agents can recover project context.", - } - ), + json.dumps(_synthesis_payload()), encoding="utf-8", ) From a0ff8089ead3691d29a7990e61a7a8d3f0e23a67 Mon Sep 17 00:00:00 2001 From: phernandez Date: Thu, 4 Jun 2026 21:53:47 -0500 Subject: [PATCH 2/7] fix(ci): refresh auto bm schema notes Signed-off-by: phernandez --- src/basic_memory/ci/README.md | 20 ++++++++ src/basic_memory/cli/commands/ci.py | 28 ++++++++-- tests/cli/test_ci_commands.py | 79 +++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 5 deletions(-) diff --git a/src/basic_memory/ci/README.md b/src/basic_memory/ci/README.md index 4e815d5e..5b111ddf 100644 --- a/src/basic_memory/ci/README.md +++ b/src/basic_memory/ci/README.md @@ -10,6 +10,11 @@ fix solved it, what changed in the system, what complexity or cleanup came with it, and why future humans or agents should care. The Basic Memory CLI owns authentication, schema guidance, idempotency, and publishing. +This follows the same product thread as semantic commit messages for temporal +knowledge queries: keep the source event factual and immutable, then add a +human-readable semantic layer that future searches can use to understand how the +project changed over time. + The product voice is: > GitHub records the mechanics. Basic Memory remembers what changed and why. @@ -53,6 +58,18 @@ than one workspace: bm ci setup --project --project-id --cloud --yes ``` +Setup does not overwrite existing schema notes by default. After upgrading Auto +BM, refresh the installed schema guidance with either spelling: + +```bash +bm ci setup --project --workspace --cloud --yes --refresh-schemas +bm ci setup --project --workspace --cloud --yes --update-schemas +``` + +The shorter aliases `--refresh` and `--update` are also accepted. Refresh keeps +custom schema note paths when it finds existing notes, and only writes the +canonical Auto BM schema content. + Then review and commit the generated files: ```text @@ -123,6 +140,9 @@ exist: - `GitHubPullRequestUpdate` - `GitHubProductionDeployUpdate` +Use `--refresh-schemas` or `--update-schemas` when you want setup to update +those schema notes instead of only creating missing ones. + `bm ci collect` Reads the current GitHub event payload and normalizes it into diff --git a/src/basic_memory/cli/commands/ci.py b/src/basic_memory/cli/commands/ci.py index f98b6919..205569d0 100644 --- a/src/basic_memory/cli/commands/ci.py +++ b/src/basic_memory/cli/commands/ci.py @@ -68,6 +68,14 @@ def setup( yes: bool = typer.Option(False, "--yes", help="Skip confirmation prompts"), local: bool = typer.Option(False, "--local", help="Force local API routing for schema seeding"), cloud: bool = typer.Option(False, "--cloud", help="Force cloud API routing for schema seeding"), + refresh_schemas: bool = typer.Option( + False, + "--refresh-schemas", + "--update-schemas", + "--refresh", + "--update", + help="Update existing Auto BM schema notes instead of only seeding missing ones", + ), ) -> None: """Install the GitHub Actions workflow and seed project update schemas.""" try: @@ -97,6 +105,7 @@ def setup( project=project, project_id=project_id, workspace=workspace, + refresh=refresh_schemas, ) ) @@ -104,7 +113,8 @@ def setup( console.print(f"Repository: {owner}/{repo}") console.print(f"Project: {project}") if seeded: - console.print(f"Seeded schemas: {', '.join(seeded)}") + verb = "Updated" if refresh_schemas else "Seeded" + console.print(f"{verb} schemas: {', '.join(seeded)}") else: console.print("Schema notes already exist; nothing seeded") console.print("\nAdd these GitHub secrets before enabling the workflow:") @@ -227,6 +237,7 @@ async def seed_project_update_schemas( project: str | None, project_id: str | None = None, workspace: str | None = None, + refresh: bool = False, ) -> list[str]: """Seed Auto BM schema notes without overwriting customized schemas.""" seeded: list[str] = [] @@ -240,18 +251,25 @@ async def seed_project_update_schemas( output_format="json", page_size=1, ) - if _search_results(existing): + existing_results = _search_results(existing) + if existing_results and not refresh: continue + title, directory = _note_write_target( + existing, + default_title=spec.title, + default_directory="schemas", + ) + await mcp_write_note( - title=spec.title, + title=title, content=spec.content, - directory="schemas", + directory=directory, project=routed_project, project_id=project_id, note_type="schema", metadata=spec.metadata, - overwrite=False, + overwrite=bool(existing_results) and refresh, output_format="json", ) seeded.append(spec.entity) diff --git a/tests/cli/test_ci_commands.py b/tests/cli/test_ci_commands.py index 8f297a40..f662db8c 100644 --- a/tests/cli/test_ci_commands.py +++ b/tests/cli/test_ci_commands.py @@ -3,6 +3,7 @@ from pathlib import Path from unittest.mock import AsyncMock, patch +from basic_memory.cli.commands.ci import seed_project_update_schemas from typer.testing import CliRunner from basic_memory.cli.main import app as cli_app @@ -99,9 +100,46 @@ def test_setup_writes_workflow_config_and_prompt( project="team-memory", project_id=None, workspace=None, + refresh=False, ) +@patch("basic_memory.cli.commands.ci.seed_project_update_schemas", new_callable=AsyncMock) +def test_setup_refreshes_or_updates_existing_schema_notes_when_requested( + mock_seed: AsyncMock, + tmp_path: Path, +) -> None: + for flag in ("--refresh", "--update-schemas"): + repo_path = tmp_path / flag.removeprefix("--") + repo_path.mkdir() + _init_github_repo(repo_path) + + result = runner.invoke( + cli_app, + [ + "ci", + "setup", + "--project", + "team-memory", + "--repo-root", + str(repo_path), + flag, + "--yes", + ], + ) + + assert result.exit_code == 0, result.output + + assert mock_seed.await_count == 2 + for seed_call in mock_seed.await_args_list: + assert seed_call.kwargs == { + "project": "team-memory", + "project_id": None, + "workspace": None, + "refresh": True, + } + + @patch("basic_memory.cli.commands.ci.seed_project_update_schemas", new_callable=AsyncMock) def test_setup_does_not_partially_write_generated_files_when_target_exists( mock_seed: AsyncMock, @@ -132,6 +170,47 @@ def test_setup_does_not_partially_write_generated_files_when_target_exists( mock_seed.assert_not_awaited() +@patch("basic_memory.cli.commands.ci.mcp_search_notes", new_callable=AsyncMock) +@patch("basic_memory.cli.commands.ci.mcp_write_note", new_callable=AsyncMock) +async def test_seed_project_update_schemas_skips_existing_notes_by_default( + mock_write: AsyncMock, + mock_search: AsyncMock, +) -> None: + mock_search.return_value = { + "results": [{"title": "ProjectUpdate", "file_path": "schemas/ProjectUpdate.md"}] + } + + seeded = await seed_project_update_schemas(project="team-memory") + + assert seeded == [] + mock_write.assert_not_awaited() + + +@patch("basic_memory.cli.commands.ci.mcp_search_notes", new_callable=AsyncMock) +@patch("basic_memory.cli.commands.ci.mcp_write_note", new_callable=AsyncMock) +async def test_seed_project_update_schemas_refreshes_existing_notes( + mock_write: AsyncMock, + mock_search: AsyncMock, +) -> None: + mock_search.return_value = { + "results": [{"title": "Custom ProjectUpdate", "file_path": "custom/schemas/update.md"}] + } + mock_write.return_value = {"action": "updated"} + + seeded = await seed_project_update_schemas(project="team-memory", refresh=True) + + assert seeded == [ + "ProjectUpdate", + "GitHubPullRequestUpdate", + "GitHubProductionDeployUpdate", + ] + assert mock_write.await_count == 3 + first_call = mock_write.await_args_list[0].kwargs + assert first_call["title"] == "Custom ProjectUpdate" + assert first_call["directory"] == "custom/schemas" + assert first_call["overwrite"] is True + + def test_setup_rejects_non_github_repo(tmp_path: Path) -> None: subprocess.run(["git", "init"], cwd=tmp_path, check=True, capture_output=True) subprocess.run( From d47c8c1647cd618d54ce14f3000489d788e594b5 Mon Sep 17 00:00:00 2001 From: phernandez Date: Thu, 4 Jun 2026 21:54:27 -0500 Subject: [PATCH 3/7] docs(ci): clarify auto bm semantic layer Signed-off-by: phernandez --- src/basic_memory/ci/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/basic_memory/ci/README.md b/src/basic_memory/ci/README.md index 5b111ddf..4a5493b1 100644 --- a/src/basic_memory/ci/README.md +++ b/src/basic_memory/ci/README.md @@ -10,10 +10,10 @@ fix solved it, what changed in the system, what complexity or cleanup came with it, and why future humans or agents should care. The Basic Memory CLI owns authentication, schema guidance, idempotency, and publishing. -This follows the same product thread as semantic commit messages for temporal -knowledge queries: keep the source event factual and immutable, then add a -human-readable semantic layer that future searches can use to understand how the -project changed over time. +The semantic layer is the point: GitHub can answer when something merged or +deployed, but the project memory should answer later questions such as what +problem was solved, what choices were made, what changed in the architecture, +and what risks, cleanup, or follow-up work came with the change. The product voice is: From 1928ec382f5507ffa02ed1efc37ad253f1a4ce5c Mon Sep 17 00:00:00 2001 From: phernandez Date: Thu, 4 Jun 2026 21:59:06 -0500 Subject: [PATCH 4/7] fix(ci): refresh schemas without forcing files Signed-off-by: phernandez --- src/basic_memory/ci/README.md | 4 ++- src/basic_memory/cli/commands/ci.py | 29 +++++++++++++++++--- tests/cli/test_ci_commands.py | 42 +++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/src/basic_memory/ci/README.md b/src/basic_memory/ci/README.md index 4a5493b1..2639a76f 100644 --- a/src/basic_memory/ci/README.md +++ b/src/basic_memory/ci/README.md @@ -68,7 +68,9 @@ bm ci setup --project --workspace --cloud The shorter aliases `--refresh` and `--update` are also accepted. Refresh keeps custom schema note paths when it finds existing notes, and only writes the -canonical Auto BM schema content. +canonical Auto BM schema content. If the generated workflow/config/prompt files +already exist, refresh leaves those files unchanged unless you also pass +`--force`. Then review and commit the generated files: diff --git a/src/basic_memory/cli/commands/ci.py b/src/basic_memory/cli/commands/ci.py index 205569d0..dedfede4 100644 --- a/src/basic_memory/cli/commands/ci.py +++ b/src/basic_memory/cli/commands/ci.py @@ -97,7 +97,12 @@ def setup( if not confirmed: raise typer.Exit(1) - _write_generated_files(repo_root, config, force=force) + wrote_generated_files = _write_generated_files( + repo_root, + config, + force=force, + preserve_existing=refresh_schemas, + ) with force_routing(local=local, cloud=cloud): seeded = run_with_cleanup( @@ -109,7 +114,12 @@ def setup( ) ) - console.print("[green]Auto BM GitHub workflow installed[/green]") + if wrote_generated_files: + console.print("[green]Auto BM GitHub workflow installed[/green]") + else: + console.print( + "[yellow]Auto BM GitHub workflow already exists; generated files unchanged[/yellow]" + ) console.print(f"Repository: {owner}/{repo}") console.print(f"Project: {project}") if seeded: @@ -342,16 +352,27 @@ def _note_write_target( return title, default_directory -def _write_generated_files(repo_root: Path, config: ProjectUpdateConfig, *, force: bool) -> None: +def _write_generated_files( + repo_root: Path, + config: ProjectUpdateConfig, + *, + force: bool, + preserve_existing: bool = False, +) -> bool: files = { repo_root / DEFAULT_WORKFLOW_PATH: render_workflow(config), repo_root / DEFAULT_PROMPT_PATH: render_capture_prompt(), } config_path = repo_root / DEFAULT_CONFIG_PATH - _validate_generated_targets([*files, config_path], force=force) + targets = [*files, config_path] + if preserve_existing and not force and any(path.exists() for path in targets): + return False + + _validate_generated_targets(targets, force=force) for path, content in files.items(): _write_generated_file(path, content, force=force) write_project_update_config(config_path, config) + return True def _validate_generated_targets(paths: list[Path], *, force: bool) -> None: diff --git a/tests/cli/test_ci_commands.py b/tests/cli/test_ci_commands.py index f662db8c..1cc1be3e 100644 --- a/tests/cli/test_ci_commands.py +++ b/tests/cli/test_ci_commands.py @@ -140,6 +140,48 @@ def test_setup_refreshes_or_updates_existing_schema_notes_when_requested( } +@patch("basic_memory.cli.commands.ci.seed_project_update_schemas", new_callable=AsyncMock) +def test_setup_refreshes_schema_notes_when_generated_files_already_exist( + mock_seed: AsyncMock, + tmp_path: Path, +) -> None: + _init_github_repo(tmp_path) + workflow_path = tmp_path / ".github/workflows/basic-memory.yml" + config_path = tmp_path / ".github/basic-memory/config.yml" + prompt_path = tmp_path / ".github/basic-memory/memory-ci-capture.md" + workflow_path.parent.mkdir(parents=True) + config_path.parent.mkdir(parents=True) + workflow_path.write_text("custom workflow\n", encoding="utf-8") + config_path.write_text("project: existing\n", encoding="utf-8") + prompt_path.write_text("custom prompt\n", encoding="utf-8") + + result = runner.invoke( + cli_app, + [ + "ci", + "setup", + "--project", + "team-memory", + "--repo-root", + str(tmp_path), + "--refresh-schemas", + "--yes", + ], + ) + + assert result.exit_code == 0, result.output + assert "generated files unchanged" in result.output + assert workflow_path.read_text(encoding="utf-8") == "custom workflow\n" + assert config_path.read_text(encoding="utf-8") == "project: existing\n" + assert prompt_path.read_text(encoding="utf-8") == "custom prompt\n" + mock_seed.assert_awaited_once_with( + project="team-memory", + project_id=None, + workspace=None, + refresh=True, + ) + + @patch("basic_memory.cli.commands.ci.seed_project_update_schemas", new_callable=AsyncMock) def test_setup_does_not_partially_write_generated_files_when_target_exists( mock_seed: AsyncMock, From c30493633d5bb4baef4e448c43358167e4350f08 Mon Sep 17 00:00:00 2001 From: phernandez Date: Thu, 4 Jun 2026 22:13:59 -0500 Subject: [PATCH 5/7] fix(ci): link auto bm issue sources Signed-off-by: phernandez --- src/basic_memory/ci/project_updates.py | 40 ++++++++++++++++- tests/ci/test_project_updates.py | 61 ++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/src/basic_memory/ci/project_updates.py b/src/basic_memory/ci/project_updates.py index 2e87d744..8060e18d 100644 --- a/src/basic_memory/ci/project_updates.py +++ b/src/basic_memory/ci/project_updates.py @@ -601,8 +601,7 @@ def build_project_update_note( source_links.append(f"- Source: {context.source_url}") if context.repo_url: source_links.append(f"- Repository: {context.repo_url}") - if context.linked_issues: - source_links.append(f"- Linked issues: {', '.join(context.linked_issues)}") + source_links.extend(_linked_issue_source_links(context)) if source_links: sections.extend(["## Source Links", *source_links]) @@ -628,6 +627,43 @@ def _extend_list_section(sections: list[str], title: str, values: list[str]) -> sections.extend([f"## {title}", *[f"- {value}" for value in cleaned]]) +def _linked_issue_source_links(context: ProjectUpdateContext) -> list[str]: + """Render linked issue references as durable source links.""" + details_by_number = {detail.number: detail for detail in context.linked_issue_details} + issue_numbers = [ + number for number in (_issue_number(issue) for issue in context.linked_issues) if number + ] + for number in details_by_number: + if number not in issue_numbers: + issue_numbers.append(number) + + links: list[str] = [] + for number in issue_numbers: + detail = details_by_number.get(number) + label = _linked_issue_label(number, detail) + url = detail.url if detail and detail.url else _github_issue_url(context.repo_url, number) + rendered = f"[{label}]({url})" if url else label + links.append(f"- Linked issue: {rendered}") + return links + + +def _linked_issue_label(number: int, detail: LinkedIssueDetail | None) -> str: + label = f"#{number}" + if detail is None: + return label + if detail.title: + label = f"{label} {detail.title}" + if detail.state: + label = f"{label} ({detail.state})" + return label + + +def _github_issue_url(repo_url: str | None, number: int) -> str | None: + if not repo_url: + return None + return f"{repo_url.rstrip('/')}/issues/{number}" + + def render_agent_synthesis_schema() -> str: """Render the optional Codex structured-output schema guardrail.""" properties = { diff --git a/tests/ci/test_project_updates.py b/tests/ci/test_project_updates.py index 340fe7cb..44870c29 100644 --- a/tests/ci/test_project_updates.py +++ b/tests/ci/test_project_updates.py @@ -151,6 +151,32 @@ def fake_github_api_get(path: str, token: str) -> list[dict] | dict: ) +def test_github_api_get_list_fetches_multiple_pages(monkeypatch: pytest.MonkeyPatch) -> None: + calls: list[str] = [] + + def fake_github_api_get(path: str, token: str) -> list[dict]: + assert token == "github-token" + calls.append(path) + if path.endswith("page=1"): + return [{"filename": f"file-{index}.py"} for index in range(100)] + if path.endswith("page=2"): + return [{"filename": "file-100.py"}] + raise AssertionError(f"unexpected GitHub API path: {path}") + + monkeypatch.setattr(project_updates, "_github_api_get", fake_github_api_get, raising=False) + + files = project_updates._github_api_get_list( + "/repos/basicmachines-co/basic-memory/pulls/123/files", + "github-token", + ) + + assert len(files) == 101 + assert calls == [ + "/repos/basicmachines-co/basic-memory/pulls/123/files?per_page=100&page=1", + "/repos/basicmachines-co/basic-memory/pulls/123/files?per_page=100&page=2", + ] + + def test_collect_handles_sparse_pull_request_payload(tmp_path: Path) -> None: payload = { "action": "closed", @@ -424,6 +450,41 @@ def test_build_project_update_note_renders_story_sections(tmp_path: Path) -> Non assert "## Refactors Or Removals" in note.content +def test_build_project_update_note_renders_linked_issue_details_as_links() -> None: + context = ProjectUpdateContext( + eligible=True, + source_event="pull_request_merged", + repo="basicmachines-co/basic-memory", + repo_url="https://github.com/basicmachines-co/basic-memory", + source_url="https://github.com/basicmachines-co/basic-memory/pull/123", + idempotency_key="github:basicmachines-co/basic-memory:pull_request_merged:123", + pr_number=123, + title="Remember project updates", + linked_issues=["#77", "#88"], + linked_issue_details=[ + project_updates.LinkedIssueDetail( + number=77, + title="Codex structured output rejects optional schema fields", + state="closed", + url="https://github.com/basicmachines-co/basic-memory/issues/77", + ) + ], + ) + synthesis = AgentSynthesis.model_validate(_synthesis_payload()) + + note = build_project_update_note(context=context, synthesis=synthesis) + + assert ( + "- Linked issue: [#77 Codex structured output rejects optional schema fields " + "(closed)](https://github.com/basicmachines-co/basic-memory/issues/77)" in note.content + ) + assert ( + "- Linked issue: [#88](https://github.com/basicmachines-co/basic-memory/issues/88)" + in note.content + ) + assert "- Linked issues: #77, #88" not in note.content + + def test_build_project_update_note_for_production_deploy(tmp_path: Path) -> None: payload = { "action": "completed", From 0bbf32b4ec9fe09876b6d3fb551ba93f71965b31 Mon Sep 17 00:00:00 2001 From: phernandez Date: Thu, 4 Jun 2026 22:20:10 -0500 Subject: [PATCH 6/7] docs(ci): explain project memory heading Signed-off-by: phernandez --- src/basic_memory/ci/project_updates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/basic_memory/ci/project_updates.py b/src/basic_memory/ci/project_updates.py index 8060e18d..97be72c7 100644 --- a/src/basic_memory/ci/project_updates.py +++ b/src/basic_memory/ci/project_updates.py @@ -582,6 +582,7 @@ def build_project_update_note( synthesis.solution, "## Impact On The System", synthesis.system_impact, + # Keep the structured-output field stable while using the clearer note heading. "## Project Memory", synthesis.why_it_matters, ] From b828aaf055b952af2cbacb11c7a564f1e8774604 Mon Sep 17 00:00:00 2001 From: phernandez Date: Thu, 4 Jun 2026 22:28:06 -0500 Subject: [PATCH 7/7] feat(ci): add auto bm soul guide Signed-off-by: phernandez --- .github/basic-memory/SOUL.md | 25 ++++++++++++ .github/basic-memory/memory-ci-capture.md | 13 ++++++ src/basic_memory/ci/README.md | 12 ++++-- src/basic_memory/ci/project_updates.py | 48 ++++++++++++++++++++++- src/basic_memory/cli/commands/ci.py | 3 ++ tests/ci/test_project_updates.py | 14 +++++++ tests/cli/test_ci_commands.py | 7 ++++ 7 files changed, 117 insertions(+), 5 deletions(-) create mode 100644 .github/basic-memory/SOUL.md diff --git a/.github/basic-memory/SOUL.md b/.github/basic-memory/SOUL.md new file mode 100644 index 00000000..de7890a3 --- /dev/null +++ b/.github/basic-memory/SOUL.md @@ -0,0 +1,25 @@ +# Auto BM Soul + +Write project updates for humans who will return later trying to understand what happened. + +## Voice + +- Clear, direct, warm, and technically honest. +- Prefer concrete observations over generic praise. +- It is okay to say when code is messy, risky, clever, boring, or satisfying. +- Keep personality in service of memory, not performance. + +## Do + +- Tell the story. +- Name the tradeoffs. +- Call out sharp edges. +- Notice good simplifications. +- Let the note have taste and a little life when the evidence supports it. + +## Do Not + +- Do not invent intent, impact, tests, or drama. +- Dunk on people. +- Turn the note into marketing copy. +- Hide uncertainty behind confident prose. diff --git a/.github/basic-memory/memory-ci-capture.md b/.github/basic-memory/memory-ci-capture.md index bffa3818..a7426144 100644 --- a/.github/basic-memory/memory-ci-capture.md +++ b/.github/basic-memory/memory-ci-capture.md @@ -6,6 +6,8 @@ GitHub records the mechanics. Basic Memory remembers what changed and why. ## Inputs - Read `.github/basic-memory/project-update-context.json`. +- Read `.github/basic-memory/SOUL.md` if it exists. It is the repo-local voice and style guide + for project updates. - Read the PR diff before writing when a SHA is available. Useful commands: `git show --stat --name-only ` and `git show --format=fuller --no-patch `. - Use linked issue details, changed files, commit messages, PR body, labels, and @@ -27,6 +29,17 @@ refactored or removed, which components changed, and how the system is different after the merge. Prefer specific component names, file paths, modules, commands, and behavior over generic phrases. +## Voice And Candor + +You may have a point of view. Be clear, specific, and human. +It is okay to say when the code is messy, risky, clever, boring, or satisfying, +but explain why. If the work is elegant or genuinely useful, say that too. +Ground all judgments in the PR, linked issues, diff, tests, and source facts. + +The soul file can shape tone, taste, and personality. It cannot override source +facts, schema requirements, or the evidence standard above. Do not be mean, +vague, theatrical, or invent criticism. + ## Output Return only JSON that matches the provided AgentSynthesis schema: diff --git a/src/basic_memory/ci/README.md b/src/basic_memory/ci/README.md index 2639a76f..bd384f7d 100644 --- a/src/basic_memory/ci/README.md +++ b/src/basic_memory/ci/README.md @@ -38,7 +38,7 @@ Basic Memory API key. ## Setup CI/CD Use `bm ci setup` from the GitHub repository root. The command installs the -workflow/config/prompt files and seeds the Basic Memory schema notes. +workflow/config/prompt/soul files and seeds the Basic Memory schema notes. For the common cloud path: @@ -68,8 +68,8 @@ bm ci setup --project --workspace --cloud The shorter aliases `--refresh` and `--update` are also accepted. Refresh keeps custom schema note paths when it finds existing notes, and only writes the -canonical Auto BM schema content. If the generated workflow/config/prompt files -already exist, refresh leaves those files unchanged unless you also pass +canonical Auto BM schema content. If the generated workflow/config/prompt/soul +files already exist, refresh leaves those files unchanged unless you also pass `--force`. Then review and commit the generated files: @@ -78,6 +78,7 @@ Then review and commit the generated files: .github/workflows/basic-memory.yml .github/basic-memory/config.yml .github/basic-memory/memory-ci-capture.md +.github/basic-memory/SOUL.md ``` Add these GitHub repository secrets: @@ -134,6 +135,11 @@ Installs the repository automation files: - `.github/workflows/basic-memory.yml` - `.github/basic-memory/config.yml` - `.github/basic-memory/memory-ci-capture.md` +- `.github/basic-memory/SOUL.md` + +`SOUL.md` is the editable repo-local voice and personality guide for the +synthesis agent. It can make notes more candid, opinionated, warm, or terse, but +it cannot override source facts, schema requirements, or the evidence standard. It also seeds the canonical Basic Memory schema notes when they do not already exist: diff --git a/src/basic_memory/ci/project_updates.py b/src/basic_memory/ci/project_updates.py index 97be72c7..67b71f5c 100644 --- a/src/basic_memory/ci/project_updates.py +++ b/src/basic_memory/ci/project_updates.py @@ -25,6 +25,7 @@ DEFAULT_CONFIG_PATH = ".github/basic-memory/config.yml" DEFAULT_WORKFLOW_PATH = ".github/workflows/basic-memory.yml" DEFAULT_PROMPT_PATH = ".github/basic-memory/memory-ci-capture.md" +DEFAULT_SOUL_PATH = ".github/basic-memory/SOUL.md" DEFAULT_CONTEXT_PATH = ".github/basic-memory/project-update-context.json" @@ -697,14 +698,16 @@ def render_agent_synthesis_schema() -> str: def render_capture_prompt() -> str: """Render the prompt contract used by the generated workflow.""" - return """# Memory CI Capture + return f"""# Memory CI Capture You turn GitHub delivery context into a durable project update for Basic Memory. GitHub records the mechanics. Basic Memory remembers what changed and why. ## Inputs -- Read `.github/basic-memory/project-update-context.json`. +- Read `{DEFAULT_CONTEXT_PATH}`. +- Read `{DEFAULT_SOUL_PATH}` if it exists. It is the repo-local voice and style guide + for project updates. - Read the PR diff before writing when a SHA is available. Useful commands: `git show --stat --name-only ` and `git show --format=fuller --no-patch `. - Use linked issue details, changed files, commit messages, PR body, labels, and @@ -726,6 +729,17 @@ def render_capture_prompt() -> str: after the merge. Prefer specific component names, file paths, modules, commands, and behavior over generic phrases. +## Voice And Candor + +You may have a point of view. Be clear, specific, and human. +It is okay to say when the code is messy, risky, clever, boring, or satisfying, +but explain why. If the work is elegant or genuinely useful, say that too. +Ground all judgments in the PR, linked issues, diff, tests, and source facts. + +The soul file can shape tone, taste, and personality. It cannot override source +facts, schema requirements, or the evidence standard above. Do not be mean, +vague, theatrical, or invent criticism. + ## Output Return only JSON that matches the provided AgentSynthesis schema: @@ -751,6 +765,36 @@ def render_capture_prompt() -> str: """ +def render_soul_template() -> str: + """Render the editable Auto BM voice and personality guide.""" + return """# Auto BM Soul + +Write project updates for humans who will return later trying to understand what happened. + +## Voice + +- Clear, direct, warm, and technically honest. +- Prefer concrete observations over generic praise. +- It is okay to say when code is messy, risky, clever, boring, or satisfying. +- Keep personality in service of memory, not performance. + +## Do + +- Tell the story. +- Name the tradeoffs. +- Call out sharp edges. +- Notice good simplifications. +- Let the note have taste and a little life when the evidence supports it. + +## Do Not + +- Do not invent intent, impact, tests, or drama. +- Dunk on people. +- Turn the note into marketing copy. +- Hide uncertainty behind confident prose. +""" + + def render_workflow(config: ProjectUpdateConfig) -> str: """Render the generated GitHub Actions workflow.""" workflow_names = json.dumps(config.deploy_workflows) diff --git a/src/basic_memory/cli/commands/ci.py b/src/basic_memory/cli/commands/ci.py index dedfede4..d44887e3 100644 --- a/src/basic_memory/cli/commands/ci.py +++ b/src/basic_memory/cli/commands/ci.py @@ -14,6 +14,7 @@ from basic_memory.ci.project_updates import ( DEFAULT_CONFIG_PATH, DEFAULT_PROMPT_PATH, + DEFAULT_SOUL_PATH, DEFAULT_WORKFLOW_PATH, AgentSynthesis, ProjectUpdateConfig, @@ -25,6 +26,7 @@ load_project_update_config, render_agent_synthesis_schema, render_capture_prompt, + render_soul_template, render_workflow, schema_seed_specs, write_project_update_config, @@ -362,6 +364,7 @@ def _write_generated_files( files = { repo_root / DEFAULT_WORKFLOW_PATH: render_workflow(config), repo_root / DEFAULT_PROMPT_PATH: render_capture_prompt(), + repo_root / DEFAULT_SOUL_PATH: render_soul_template(), } config_path = repo_root / DEFAULT_CONFIG_PATH targets = [*files, config_path] diff --git a/tests/ci/test_project_updates.py b/tests/ci/test_project_updates.py index 44870c29..d14f3c2a 100644 --- a/tests/ci/test_project_updates.py +++ b/tests/ci/test_project_updates.py @@ -16,6 +16,7 @@ parse_github_remote, render_agent_synthesis_schema, render_capture_prompt, + render_soul_template, render_workflow, schema_seed_specs, ) @@ -622,10 +623,23 @@ def test_render_capture_prompt_uses_workspace_context_path() -> None: prompt = render_capture_prompt() assert ".github/basic-memory/project-update-context.json" in prompt + assert ".github/basic-memory/SOUL.md" in prompt assert "${{ runner.temp }}" not in prompt assert "Do not write a fill-in-the-blanks note" in prompt assert "Read the PR diff before writing" in prompt assert "problem -> solution -> impact" in prompt + assert "It is okay to say when the code is messy" in prompt + assert "Ground all judgments" in prompt + + +def test_render_soul_template_guides_personality_without_overriding_facts() -> None: + soul = render_soul_template() + + assert soul.startswith("# Auto BM Soul") + assert "It is okay to say when code is messy" in soul + assert "Notice good simplifications" in soul + assert "Do not invent intent, impact, tests, or drama" in soul + assert "Keep personality in service of memory" in soul def test_render_agent_synthesis_schema_is_ci_guardrail_not_domain_schema() -> None: diff --git a/tests/cli/test_ci_commands.py b/tests/cli/test_ci_commands.py index 1cc1be3e..76e60da1 100644 --- a/tests/cli/test_ci_commands.py +++ b/tests/cli/test_ci_commands.py @@ -94,6 +94,10 @@ def test_setup_writes_workflow_config_and_prompt( assert (tmp_path / ".github/workflows/basic-memory.yml").exists() assert (tmp_path / ".github/basic-memory/config.yml").exists() assert (tmp_path / ".github/basic-memory/memory-ci-capture.md").exists() + assert (tmp_path / ".github/basic-memory/SOUL.md").exists() + assert "Keep personality in service of memory" in ( + tmp_path / ".github/basic-memory/SOUL.md" + ).read_text(encoding="utf-8") assert "OPENAI_API_KEY" in result.output assert "BASIC_MEMORY_API_KEY" in result.output mock_seed.assert_awaited_once_with( @@ -149,11 +153,13 @@ def test_setup_refreshes_schema_notes_when_generated_files_already_exist( workflow_path = tmp_path / ".github/workflows/basic-memory.yml" config_path = tmp_path / ".github/basic-memory/config.yml" prompt_path = tmp_path / ".github/basic-memory/memory-ci-capture.md" + soul_path = tmp_path / ".github/basic-memory/SOUL.md" workflow_path.parent.mkdir(parents=True) config_path.parent.mkdir(parents=True) workflow_path.write_text("custom workflow\n", encoding="utf-8") config_path.write_text("project: existing\n", encoding="utf-8") prompt_path.write_text("custom prompt\n", encoding="utf-8") + soul_path.write_text("custom soul\n", encoding="utf-8") result = runner.invoke( cli_app, @@ -174,6 +180,7 @@ def test_setup_refreshes_schema_notes_when_generated_files_already_exist( assert workflow_path.read_text(encoding="utf-8") == "custom workflow\n" assert config_path.read_text(encoding="utf-8") == "project: existing\n" assert prompt_path.read_text(encoding="utf-8") == "custom prompt\n" + assert soul_path.read_text(encoding="utf-8") == "custom soul\n" mock_seed.assert_awaited_once_with( project="team-memory", project_id=None,