Skip to content

Commit 5f3329f

Browse files
fix: harden mcp and cloud recovery paths
1 parent e092095 commit 5f3329f

6 files changed

Lines changed: 297 additions & 9 deletions

File tree

cmd/engram/main.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,10 +482,53 @@ func preflightCloudSync(s *store.Store, cfg store.Config, project string, mutate
482482
}
483483
return nil, fmt.Errorf("cloud sync blocked_unenrolled: %s", message)
484484
}
485+
if err := preflightCloudSyncLegacyMutations(s, project, targetKey, mutateState); err != nil {
486+
return nil, err
487+
}
485488
}
486489
return cc, nil
487490
}
488491

492+
func preflightCloudSyncLegacyMutations(s *store.Store, project, targetKey string, mutateState bool) error {
493+
report, err := s.DiagnoseCloudUpgradeLegacyMutations(project)
494+
if err != nil {
495+
return fmt.Errorf("cloud sync legacy mutation preflight: %w", err)
496+
}
497+
if report.BlockedCount == 0 && report.RepairableCount == 0 {
498+
return nil
499+
}
500+
501+
reasonCode := store.UpgradeReasonRepairableLegacyMutationPayload
502+
message := fmt.Sprintf(
503+
"legacy mutation payloads require repair before cloud sync for project %q: run `engram cloud upgrade doctor --project %s` then `engram cloud upgrade repair --project %s --apply`",
504+
project, project, project,
505+
)
506+
if report.BlockedCount > 0 {
507+
reasonCode = store.UpgradeReasonBlockedLegacyMutationManual
508+
first := firstBlockedLegacyMutationFinding(report)
509+
message = fmt.Sprintf(
510+
"legacy mutation payloads require manual action before cloud sync for project %q (seq=%d entity=%s op=%s): %s; inspect with `engram cloud upgrade doctor --project %s` and run `engram cloud upgrade repair --project %s --apply` for deterministic repairs",
511+
project, first.Seq, first.Entity, first.Op, first.Message, project, project,
512+
)
513+
}
514+
if mutateState {
515+
_ = s.MarkSyncBlocked(targetKey, reasonCode, message)
516+
}
517+
return fmt.Errorf("cloud sync %s: %s", reasonCode, message)
518+
}
519+
520+
func firstBlockedLegacyMutationFinding(report store.CloudUpgradeLegacyMutationReport) store.CloudUpgradeLegacyMutationFinding {
521+
for _, finding := range report.Findings {
522+
if !finding.Repairable {
523+
return finding
524+
}
525+
}
526+
if len(report.Findings) > 0 {
527+
return report.Findings[0]
528+
}
529+
return store.CloudUpgradeLegacyMutationFinding{}
530+
}
531+
489532
func cloudTargetKeyForProject(project string) string {
490533
project = strings.TrimSpace(project)
491534
if project == "" {

cmd/engram/main_extra_test.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,84 @@ func TestCmdCloudUpgradeDoctorRequiresProjectAndIsDeterministic(t *testing.T) {
861861
})
862862
}
863863

864+
func TestCmdSyncCloudPreflightsLegacyMutationPayloads(t *testing.T) {
865+
stubExitWithPanic(t)
866+
stubRuntimeHooks(t)
867+
868+
cfg := testConfig(t)
869+
if err := saveCloudConfig(cfg, &cloudConfig{ServerURL: "https://cloud.example.test"}); err != nil {
870+
t.Fatalf("save cloud config: %v", err)
871+
}
872+
873+
s, err := store.New(cfg)
874+
if err != nil {
875+
t.Fatalf("open store: %v", err)
876+
}
877+
if err := s.CreateSession("sync-legacy-s1", "sync-legacy", "/tmp/sync-legacy"); err != nil {
878+
_ = s.Close()
879+
t.Fatalf("create session: %v", err)
880+
}
881+
if _, err := s.AddObservation(store.AddObservationParams{SessionID: "sync-legacy-s1", Type: "decision", Title: "Canonical title", Content: "Canonical content", Project: "sync-legacy", Scope: "project"}); err != nil {
882+
_ = s.Close()
883+
t.Fatalf("add observation: %v", err)
884+
}
885+
if err := s.EnrollProject("sync-legacy"); err != nil {
886+
_ = s.Close()
887+
t.Fatalf("enroll project: %v", err)
888+
}
889+
_ = s.Close()
890+
891+
db, err := sql.Open("sqlite", filepath.Join(cfg.DataDir, "engram.db"))
892+
if err != nil {
893+
t.Fatalf("open raw db: %v", err)
894+
}
895+
defer db.Close()
896+
var syncID string
897+
if err := db.QueryRow(`SELECT sync_id FROM observations WHERE session_id = ? ORDER BY id DESC LIMIT 1`, "sync-legacy-s1").Scan(&syncID); err != nil {
898+
t.Fatalf("lookup sync id: %v", err)
899+
}
900+
legacyPayload := `{"sync_id":"` + syncID + `","session_id":"sync-legacy-s1","type":"decision","content":"legacy payload missing title","scope":"project"}`
901+
if _, err := db.Exec(
902+
`INSERT INTO sync_mutations (target_key, entity, entity_key, op, payload, source, project) VALUES (?, ?, ?, ?, ?, ?, ?)`,
903+
store.DefaultSyncTargetKey,
904+
store.SyncEntityObservation,
905+
syncID,
906+
store.SyncOpUpsert,
907+
legacyPayload,
908+
store.SyncSourceLocal,
909+
"sync-legacy",
910+
); err != nil {
911+
t.Fatalf("insert legacy mutation: %v", err)
912+
}
913+
914+
exportCalled := false
915+
oldSyncExport := syncExport
916+
syncExport = func(_ *engramsync.Syncer, _, _ string) (*engramsync.SyncResult, error) {
917+
exportCalled = true
918+
return &engramsync.SyncResult{}, nil
919+
}
920+
t.Cleanup(func() { syncExport = oldSyncExport })
921+
922+
withArgs(t, "engram", "sync", "--cloud", "--project", "sync-legacy")
923+
_, stderr, recovered := captureOutputAndRecover(t, func() { cmdSync(cfg) })
924+
if _, ok := recovered.(exitCode); !ok {
925+
t.Fatalf("expected cloud sync preflight to fail loudly, got %v", recovered)
926+
}
927+
if exportCalled {
928+
t.Fatal("cloud sync must block before export/canonicalization")
929+
}
930+
if !strings.Contains(stderr, "legacy mutation payloads require repair before cloud sync") ||
931+
!strings.Contains(stderr, "engram cloud upgrade doctor --project sync-legacy") ||
932+
!strings.Contains(stderr, "engram cloud upgrade repair --project sync-legacy --apply") {
933+
t.Fatalf("expected actionable legacy mutation guidance, got %q", stderr)
934+
}
935+
936+
var persistedPayload string
937+
if err := db.QueryRow(`SELECT payload FROM sync_mutations WHERE project = ? AND payload = ?`, "sync-legacy", legacyPayload).Scan(&persistedPayload); err != nil {
938+
t.Fatalf("expected sync preflight not to auto-repair payload: %v", err)
939+
}
940+
}
941+
864942
func TestCmdCloudUpgradeBootstrapStatusAndRollbackSemantics(t *testing.T) {
865943
stubExitWithPanic(t)
866944
stubRuntimeHooks(t)

internal/mcp/mcp.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,9 +1417,10 @@ func handleSessionStart(s *store.Store, cfg MCPConfig, activity *SessionActivity
14171417
return func(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) {
14181418
id, _ := req.GetArguments()["id"].(string)
14191419
directory, _ := req.GetArguments()["directory"].(string)
1420+
explicitDirectory := strings.TrimSpace(directory)
14201421
// project field intentionally not read — auto-detect only (REQ-308)
14211422

1422-
detRes, err := resolveWriteProject()
1423+
detRes, err := resolveSessionStartProject(explicitDirectory)
14231424
if err != nil {
14241425
// JW1: use AvailableProjects from detection result (repos in cwd).
14251426
return errorWithMeta("ambiguous_project",
@@ -1430,7 +1431,7 @@ func handleSessionStart(s *store.Store, cfg MCPConfig, activity *SessionActivity
14301431
project, _ := store.NormalizeProject(detRes.Project)
14311432

14321433
activity.RecordToolCall(defaultSessionID(project))
1433-
if strings.TrimSpace(directory) == "" {
1434+
if explicitDirectory == "" {
14341435
directory = strings.TrimSpace(detRes.Path)
14351436
if directory == "" {
14361437
directory = currentWorkingDirectory()
@@ -1446,6 +1447,20 @@ func handleSessionStart(s *store.Store, cfg MCPConfig, activity *SessionActivity
14461447
}
14471448
}
14481449

1450+
func resolveSessionStartProject(explicitDirectory string) (projectpkg.DetectionResult, error) {
1451+
if explicitDirectory == "" {
1452+
return resolveWriteProject()
1453+
}
1454+
res := projectpkg.DetectProjectFull(explicitDirectory)
1455+
if res.Error != nil {
1456+
return res, res.Error
1457+
}
1458+
if res.Source == projectpkg.SourceDirBasename {
1459+
return resolveWriteProject()
1460+
}
1461+
return res, nil
1462+
}
1463+
14491464
func handleSessionEnd(s *store.Store, cfg MCPConfig, activity *SessionActivity) server.ToolHandlerFunc {
14501465
return func(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) {
14511466
id, _ := req.GetArguments()["id"].(string)

internal/mcp/mcp_test.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2668,6 +2668,56 @@ func TestSessionStartWithExplicitDirectoryPreservesDirectory(t *testing.T) {
26682668
assertSessionSyncMutationDirectory(t, s, "session-start-explicit", explicitDir)
26692669
}
26702670

2671+
func TestSessionStartWithExplicitDirectoryResolvesProjectFromDirectory(t *testing.T) {
2672+
s := newMCPTestStore(t)
2673+
2674+
workspace := t.TempDir()
2675+
rightRepo := filepath.Join(workspace, "right-repo")
2676+
wrongRepo := filepath.Join(workspace, "wrong-repo")
2677+
if err := os.MkdirAll(filepath.Join(rightRepo, "nested"), 0755); err != nil {
2678+
t.Fatalf("create right repo nested dir: %v", err)
2679+
}
2680+
if err := os.MkdirAll(wrongRepo, 0755); err != nil {
2681+
t.Fatalf("create wrong repo dir: %v", err)
2682+
}
2683+
initTestGitRepo(t, rightRepo)
2684+
initTestGitRepo(t, wrongRepo)
2685+
cmd := exec.Command("git", "-C", rightRepo, "remote", "add", "origin",
2686+
"git@github.com:user/explicit-session-project.git")
2687+
if out, err := cmd.CombinedOutput(); err != nil {
2688+
t.Fatalf("git remote add right repo: %v\n%s", err, out)
2689+
}
2690+
cmd = exec.Command("git", "-C", wrongRepo, "remote", "add", "origin",
2691+
"git@github.com:user/wrong-session-project.git")
2692+
if out, err := cmd.CombinedOutput(); err != nil {
2693+
t.Fatalf("git remote add wrong repo: %v\n%s", err, out)
2694+
}
2695+
t.Chdir(workspace)
2696+
2697+
explicitDir := filepath.Join(rightRepo, "nested")
2698+
start := handleSessionStart(s, MCPConfig{}, NewSessionActivity(10*time.Minute))
2699+
res, err := start(context.Background(), mcppkg.CallToolRequest{
2700+
Params: mcppkg.CallToolParams{Arguments: map[string]any{
2701+
"id": "session-start-explicit-project",
2702+
"directory": explicitDir,
2703+
}},
2704+
})
2705+
if err != nil || res.IsError {
2706+
t.Fatalf("session start: err=%v isError=%v text=%s", err, res.IsError, callResultText(t, res))
2707+
}
2708+
2709+
sess, err := s.GetSession("session-start-explicit-project")
2710+
if err != nil {
2711+
t.Fatalf("get session: %v", err)
2712+
}
2713+
if sess.Project != "explicit-session-project" {
2714+
t.Fatalf("expected explicit directory project, got %q", sess.Project)
2715+
}
2716+
if sess.Directory != explicitDir {
2717+
t.Fatalf("expected persisted directory=%q, got %q", explicitDir, sess.Directory)
2718+
}
2719+
}
2720+
26712721
// ─── Batch 4: Write handler schema + auto-detect ─────────────────────────────
26722722

26732723
// TestWriteSchema_NoProjectField asserts that the 6 write tools do NOT include

internal/store/store.go

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,17 @@ var openDB = sql.Open
3030
// See https://www.sqlite.org/rescode.html#constraint_foreignkey
3131
const sqliteConstraintForeignKey = 787
3232

33+
const (
34+
sqlitePrimaryBusy = 5
35+
sqlitePrimaryLocked = 6
36+
)
37+
38+
var sqliteWriteRetryBackoffs = []time.Duration{
39+
10 * time.Millisecond,
40+
25 * time.Millisecond,
41+
50 * time.Millisecond,
42+
}
43+
3344
// Sentinel errors returned by delete operations so callers can use errors.Is.
3445
var (
3546
ErrSessionNotFound = errors.New("session not found")
@@ -4164,15 +4175,46 @@ func (s *Store) PruneProject(project string) (*PruneResult, error) {
41644175
// ─── Helpers ─────────────────────────────────────────────────────────────────
41654176

41664177
func (s *Store) withTx(fn func(tx *sql.Tx) error) error {
4167-
tx, err := s.beginTxHook()
4168-
if err != nil {
4169-
return err
4178+
return withSQLiteWriteRetry(func() error {
4179+
tx, err := s.beginTxHook()
4180+
if err != nil {
4181+
return err
4182+
}
4183+
defer tx.Rollback()
4184+
if err := fn(tx); err != nil {
4185+
return err
4186+
}
4187+
return s.commitHook(tx)
4188+
})
4189+
}
4190+
4191+
func withSQLiteWriteRetry(fn func() error) error {
4192+
var lastErr error
4193+
for attempt := 0; attempt <= len(sqliteWriteRetryBackoffs); attempt++ {
4194+
if err := fn(); err != nil {
4195+
lastErr = err
4196+
if !isRetryableSQLiteLockError(err) || attempt == len(sqliteWriteRetryBackoffs) {
4197+
return err
4198+
}
4199+
time.Sleep(sqliteWriteRetryBackoffs[attempt])
4200+
continue
4201+
}
4202+
return nil
41704203
}
4171-
defer tx.Rollback()
4172-
if err := fn(tx); err != nil {
4173-
return err
4204+
return lastErr
4205+
}
4206+
4207+
func isRetryableSQLiteLockError(err error) bool {
4208+
if err == nil {
4209+
return false
41744210
}
4175-
return s.commitHook(tx)
4211+
var sqliteErr *sqlite.Error
4212+
if errors.As(err, &sqliteErr) {
4213+
primaryCode := sqliteErr.Code() & 0xff
4214+
return primaryCode == sqlitePrimaryBusy || primaryCode == sqlitePrimaryLocked
4215+
}
4216+
msg := strings.ToLower(err.Error())
4217+
return strings.Contains(msg, "database is locked") || strings.Contains(msg, "database is busy") || strings.Contains(msg, "sqlite_busy") || strings.Contains(msg, "sqlite_locked")
41764218
}
41774219

41784220
func (s *Store) createSessionTx(tx *sql.Tx, id, project, directory string) error {

internal/store/store_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3616,6 +3616,66 @@ func TestHookFallbacksAndAdditionalBranches(t *testing.T) {
36163616
})
36173617
}
36183618

3619+
func TestSQLiteWriteRetryRetriesTransientLockErrors(t *testing.T) {
3620+
oldBackoffs := sqliteWriteRetryBackoffs
3621+
sqliteWriteRetryBackoffs = []time.Duration{0, 0, 0}
3622+
t.Cleanup(func() { sqliteWriteRetryBackoffs = oldBackoffs })
3623+
3624+
t.Run("begin lock is retried and succeeds", func(t *testing.T) {
3625+
s := newTestStore(t)
3626+
origBegin := s.hooks.beginTx
3627+
attempts := 0
3628+
s.hooks.beginTx = func(db *sql.DB) (*sql.Tx, error) {
3629+
attempts++
3630+
if attempts < 3 {
3631+
return nil, errors.New("database is locked")
3632+
}
3633+
return origBegin(db)
3634+
}
3635+
3636+
if err := s.CreateSession("retry-session", "retry-project", "/tmp/retry-project"); err != nil {
3637+
t.Fatalf("expected retry to succeed, got %v", err)
3638+
}
3639+
if attempts != 3 {
3640+
t.Fatalf("expected 3 begin attempts, got %d", attempts)
3641+
}
3642+
})
3643+
3644+
t.Run("non lock error is not retried", func(t *testing.T) {
3645+
s := newTestStore(t)
3646+
attempts := 0
3647+
s.hooks.beginTx = func(_ *sql.DB) (*sql.Tx, error) {
3648+
attempts++
3649+
return nil, errors.New("permanent begin failure")
3650+
}
3651+
3652+
err := s.CreateSession("no-retry-session", "retry-project", "/tmp/retry-project")
3653+
if err == nil || !strings.Contains(err.Error(), "permanent begin failure") {
3654+
t.Fatalf("expected permanent error, got %v", err)
3655+
}
3656+
if attempts != 1 {
3657+
t.Fatalf("expected one attempt for permanent error, got %d", attempts)
3658+
}
3659+
})
3660+
3661+
t.Run("lock errors remain bounded", func(t *testing.T) {
3662+
s := newTestStore(t)
3663+
attempts := 0
3664+
s.hooks.beginTx = func(_ *sql.DB) (*sql.Tx, error) {
3665+
attempts++
3666+
return nil, errors.New("SQLITE_BUSY: database is locked")
3667+
}
3668+
3669+
err := s.CreateSession("bounded-session", "retry-project", "/tmp/retry-project")
3670+
if err == nil || !isRetryableSQLiteLockError(err) {
3671+
t.Fatalf("expected retryable lock error after exhaustion, got %v", err)
3672+
}
3673+
if attempts != len(sqliteWriteRetryBackoffs)+1 {
3674+
t.Fatalf("expected bounded attempts=%d, got %d", len(sqliteWriteRetryBackoffs)+1, attempts)
3675+
}
3676+
})
3677+
}
3678+
36193679
func TestStoreUncoveredBranchesPushToHundred(t *testing.T) {
36203680
t.Run("new open database hook error", func(t *testing.T) {
36213681
orig := openDB

0 commit comments

Comments
 (0)