-
Notifications
You must be signed in to change notification settings - Fork 889
fix(vault): accept and persist content on POST/PUT vault/documents #1174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fc105d3
5d5be0a
1fad4e0
9edcc00
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,13 +1,149 @@ | ||
| package http | ||
|
|
||
| import ( | ||
| "errors" | ||
| "log/slog" | ||
| "net/http" | ||
| "os" | ||
| "path/filepath" | ||
| "strings" | ||
| "time" | ||
|
|
||
| "github.com/google/uuid" | ||
| "github.com/nextlevelbuilder/goclaw/internal/eventbus" | ||
| "github.com/nextlevelbuilder/goclaw/internal/store" | ||
| "github.com/nextlevelbuilder/goclaw/internal/vault" | ||
| ) | ||
|
|
||
| // writeDocumentContent writes the given content bytes to a tenant-workspace- | ||
| // relative path, after validating the path stays inside the workspace and uses | ||
| // an allowed extension. Returns the SHA-256 content hash on success. | ||
| // | ||
| // This is the shared writer used by handleCreateDocument and handleUpdateDocument | ||
| // to materialise inline JSON `content` payloads on disk, so the enrichment | ||
| // pipeline (which reads files by path) can later compute summaries, embeddings | ||
| // and links. | ||
| func (h *VaultHandler) writeDocumentContent(workspace, relPath string, content []byte) (string, error) { | ||
| // Ensure the tenant workspace exists before resolving symlinks. Non-master | ||
| // tenants live under workspace/tenants/{slug}/ which handleUpload creates on | ||
| // demand (see vault_handler_upload.go) — without this, the very first JSON | ||
| // content write into a fresh tenant would fail EvalSymlinks with ErrNotExist | ||
| // and 500 the request. | ||
| if err := os.MkdirAll(workspace, 0o755); err != nil { | ||
| return "", err | ||
| } | ||
|
|
||
| // Resolve the workspace root through any symlinks so all containment checks | ||
| // compare canonical paths. | ||
| realWS, err := filepath.EvalSymlinks(workspace) | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
| target := filepath.Join(realWS, filepath.Clean(relPath)) | ||
|
|
||
| // Lexical containment: blocks "../" and absolute-path escapes. | ||
| if target != realWS && !strings.HasPrefix(target, realWS+string(os.PathSeparator)) { | ||
| slog.Warn("security.vault_symlink_escape", | ||
| "site", "path_traversal", "attempted", target, "workspace", realWS) | ||
| return "", os.ErrInvalid | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Important (I3). This (and lines 49, 69) reject symlink/path-escape attempts by returning |
||
| } | ||
|
|
||
| // Symlink containment: a lexical prefix check is not enough — a symlink that | ||
| // lives *inside* the workspace but points outside it (e.g. workspace/link -> | ||
| // /etc) would otherwise let the write follow it and clobber a file beyond | ||
| // the workspace. Resolve the deepest already-existing ancestor of the target | ||
| // and confirm it still canonicalises to a path inside the workspace, before | ||
| // any directory is created or byte written. | ||
| for ancestor := filepath.Dir(target); ; { | ||
| resolved, rerr := filepath.EvalSymlinks(ancestor) | ||
| if rerr == nil { | ||
| if resolved != realWS && !strings.HasPrefix(resolved, realWS+string(os.PathSeparator)) { | ||
| slog.Warn("security.vault_symlink_escape", | ||
| "site", "ancestor", "resolved", resolved, "workspace", realWS) | ||
| return "", os.ErrInvalid | ||
| } | ||
| break | ||
| } | ||
| if !os.IsNotExist(rerr) { | ||
| return "", rerr | ||
| } | ||
| parent := filepath.Dir(ancestor) | ||
| if parent == ancestor { | ||
| break // walked to the filesystem root without an existing ancestor | ||
| } | ||
| ancestor = parent | ||
| } | ||
|
|
||
| if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil { | ||
| return "", err | ||
| } | ||
|
|
||
| // Fast-fail when the final component is already a symlink — gives a clean | ||
| // rejection + security log without parsing OS-specific error codes. The | ||
| // O_NOFOLLOW open below closes the TOCTOU window this Lstat leaves open | ||
| // (an attacker could swap the file for a symlink between calls). | ||
| if fi, lerr := os.Lstat(target); lerr == nil && fi.Mode()&os.ModeSymlink != 0 { | ||
| slog.Warn("security.vault_symlink_escape", | ||
| "site", "final_lstat", "target", target, "workspace", realWS) | ||
| return "", os.ErrInvalid | ||
| } | ||
|
|
||
| // O_NOFOLLOW makes the kernel refuse to follow a symlink at the final | ||
| // component atomically with the open, removing the Lstat→Open race entirely | ||
| // on Unix. On Windows oNoFollow == 0 and the Lstat above is best-effort. | ||
| f, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC|oNoFollow, 0o644) | ||
| if err != nil { | ||
| if isSymlinkLoopErr(err) { | ||
| slog.Warn("security.vault_symlink_escape", | ||
| "site", "open_nofollow", "target", target, "workspace", realWS) | ||
| return "", os.ErrInvalid | ||
| } | ||
| return "", err | ||
| } | ||
| if _, werr := f.Write(content); werr != nil { | ||
| _ = f.Close() // best-effort cleanup; the write error is the real one | ||
| return "", werr | ||
| } | ||
| // Explicit Close so a delayed FS error (e.g. disk-full at flush) surfaces | ||
| // instead of being silently swallowed by a deferred close. | ||
| if cerr := f.Close(); cerr != nil { | ||
| return "", cerr | ||
| } | ||
| return vault.ContentHash(content), nil | ||
| } | ||
|
|
||
| // publishDocUpserted enqueues the standard EventVaultDocUpserted so the | ||
| // enrichment worker re-runs summary/embedding/link extraction for this doc. | ||
| // No-op when the event bus is not wired (some test setups). | ||
| func (h *VaultHandler) publishDocUpserted(tenantID uuid.UUID, agentID, docID, relPath, hash, workspace string) { | ||
| if h.eventBus == nil { | ||
| return | ||
| } | ||
| tenantIDStr := tenantID.String() | ||
| // Start enrichment progress before publishing so the worker pool can't drain | ||
| // the event and finish before the progress tracker registers it — same | ||
| // ordering handleUpload relies on to avoid that race. | ||
| if h.enrichProgress != nil { | ||
| h.enrichProgress.Start(1, tenantID) | ||
| } | ||
| h.eventBus.Publish(eventbus.DomainEvent{ | ||
| ID: uuid.Must(uuid.NewV7()).String(), | ||
| Type: eventbus.EventVaultDocUpserted, | ||
| SourceID: docID + ":" + hash, | ||
| TenantID: tenantIDStr, | ||
| AgentID: agentID, | ||
| Timestamp: time.Now(), | ||
| Payload: eventbus.VaultDocUpsertedPayload{ | ||
| DocID: docID, | ||
| TenantID: tenantIDStr, | ||
| AgentID: agentID, | ||
| Path: relPath, | ||
| ContentHash: hash, | ||
| Workspace: workspace, | ||
| }, | ||
| }) | ||
| } | ||
|
|
||
| // handleListAllDocuments lists vault documents across all agents in tenant. | ||
| // Optional query param agent_id to filter by specific agent. | ||
| func (h *VaultHandler) handleListAllDocuments(w http.ResponseWriter, r *http.Request) { | ||
|
|
@@ -99,6 +235,16 @@ func (h *VaultHandler) handleGetDocument(w http.ResponseWriter, r *http.Request) | |
| } | ||
|
|
||
| // handleCreateDocument creates a new vault document. | ||
| // | ||
| // `content` semantics — symmetric with handleUpdateDocument: | ||
| // - field omitted (nil pointer): metadata-only stub, no file write, no event | ||
| // (typically followed by a rescan that materialises the file from disk). | ||
| // - field present and non-empty: bytes materialised at <tenant-workspace>/<path>, | ||
| // SHA-256 hash stored on the row, EventVaultDocUpserted emitted so the | ||
| // enrichment worker computes summary/embeddings/links — same code path the | ||
| // multipart /v1/vault/upload endpoint and the filesystem rescan use. | ||
| // - field present and empty (""): a 0-byte file is written and the event still | ||
| // fires. Same behaviour as PUT — pick the right shape on the client side. | ||
| func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Request) { | ||
| locale := extractLocale(r) | ||
| tenantID := store.TenantIDFromContext(r.Context()) | ||
|
|
@@ -107,6 +253,7 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque | |
| var body struct { | ||
| Path string `json:"path"` | ||
| Title string `json:"title"` | ||
| Content *string `json:"content"` // nil=no write; ""=write empty file; "data"=write bytes (mirrors PUT) | ||
| DocType string `json:"doc_type"` | ||
| Scope string `json:"scope"` | ||
| TeamID string `json:"team_id"` | ||
|
|
@@ -137,6 +284,16 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque | |
| writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid scope"}) | ||
| return | ||
| } | ||
| // Validate extension upfront if caller wants to write content — matches the | ||
| // whitelist enforced by /v1/vault/upload so the two endpoints accept the | ||
| // same file types. nil pointer = "no content field", which skips the check. | ||
| if body.Content != nil { | ||
| ext := strings.ToLower(filepath.Ext(body.Path)) | ||
| if !allowedUploadExts[ext] { | ||
| writeJSON(w, http.StatusBadRequest, map[string]string{"error": "unsupported file type: " + ext}) | ||
| return | ||
| } | ||
| } | ||
|
|
||
| doc := &store.VaultDocument{ | ||
| TenantID: tenantID.String(), | ||
|
|
@@ -160,11 +317,50 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque | |
| doc.Scope = "team" | ||
| } | ||
| } | ||
|
|
||
| // Persist file content if provided, before the DB upsert so the | ||
| // content_hash column is set in a single write. | ||
| var ( | ||
| wsPath string | ||
| writtenHash string | ||
| contentWasSet bool | ||
| ) | ||
| if body.Content != nil { | ||
| wsPath = h.resolveTenantWorkspace(r.Context()) | ||
| if wsPath == "" { | ||
| writeJSON(w, http.StatusBadRequest, map[string]string{"error": "workspace not available"}) | ||
| return | ||
| } | ||
| hash, werr := h.writeDocumentContent(wsPath, body.Path, []byte(*body.Content)) | ||
| if werr != nil { | ||
| slog.Warn("vault.create: write content failed", "path", body.Path, "error", werr) | ||
| if errors.Is(werr, os.ErrInvalid) { | ||
| writeJSON(w, http.StatusBadRequest, map[string]string{"error": "path escapes workspace"}) | ||
| return | ||
| } | ||
| writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to write content"}) | ||
| return | ||
| } | ||
| doc.ContentHash = hash | ||
| writtenHash = hash | ||
| contentWasSet = true | ||
| } | ||
|
|
||
| if err := h.store.UpsertDocument(r.Context(), doc); err != nil { | ||
| slog.Warn("vault.create failed", "error", err) | ||
| writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()}) | ||
| return | ||
| } | ||
|
|
||
| // Fire enrichment after the DB row exists so the worker can locate the doc. | ||
| if contentWasSet { | ||
| evAgent := "" | ||
| if doc.AgentID != nil { | ||
| evAgent = *doc.AgentID | ||
| } | ||
| h.publishDocUpserted(tenantID, evAgent, doc.ID, body.Path, writtenHash, wsPath) | ||
| } | ||
|
|
||
| // Re-fetch by ID (set via RETURNING) — unambiguous even when same path exists across teams. | ||
| created, _ := h.store.GetDocumentByID(r.Context(), tenantID.String(), doc.ID) | ||
| if created != nil { | ||
|
|
@@ -189,6 +385,7 @@ func (h *VaultHandler) handleUpdateDocument(w http.ResponseWriter, r *http.Reque | |
|
|
||
| var body struct { | ||
| Title *string `json:"title"` | ||
| Content *string `json:"content"` // nil=no change; "" clears the file; non-empty rewrites it | ||
| DocType *string `json:"doc_type"` | ||
| Scope *string `json:"scope"` | ||
| TeamID *string `json:"team_id"` // nil=no change, ""=clear, "uuid"=set | ||
|
|
@@ -225,11 +422,52 @@ func (h *VaultHandler) handleUpdateDocument(w http.ResponseWriter, r *http.Reque | |
| existing.Metadata = body.Metadata | ||
| } | ||
|
|
||
| // Rewrite content on disk when caller supplied a `content` field. | ||
| var ( | ||
| wsPath string | ||
| writtenHash string | ||
| contentWasSet bool | ||
| ) | ||
| if body.Content != nil { | ||
| ext := strings.ToLower(filepath.Ext(existing.Path)) | ||
| if !allowedUploadExts[ext] { | ||
| writeJSON(w, http.StatusBadRequest, map[string]string{"error": "unsupported file type: " + ext}) | ||
| return | ||
| } | ||
| wsPath = h.resolveTenantWorkspace(r.Context()) | ||
| if wsPath == "" { | ||
| writeJSON(w, http.StatusBadRequest, map[string]string{"error": "workspace not available"}) | ||
| return | ||
| } | ||
| hash, werr := h.writeDocumentContent(wsPath, existing.Path, []byte(*body.Content)) | ||
| if werr != nil { | ||
| slog.Warn("vault.update: write content failed", "path", existing.Path, "error", werr) | ||
| if errors.Is(werr, os.ErrInvalid) { | ||
| writeJSON(w, http.StatusBadRequest, map[string]string{"error": "path escapes workspace"}) | ||
| return | ||
| } | ||
| writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to write content"}) | ||
| return | ||
| } | ||
| existing.ContentHash = hash | ||
| writtenHash = hash | ||
| contentWasSet = true | ||
| } | ||
|
|
||
| if err := h.store.UpsertDocument(r.Context(), existing); err != nil { | ||
| slog.Warn("vault.update failed", "error", err) | ||
| writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()}) | ||
| return | ||
| } | ||
|
|
||
| if contentWasSet { | ||
| evAgent := "" | ||
| if existing.AgentID != nil { | ||
| evAgent = *existing.AgentID | ||
| } | ||
| h.publishDocUpserted(tenantID, evAgent, existing.ID, existing.Path, writtenHash, wsPath) | ||
| } | ||
|
|
||
| updated, _ := h.store.GetDocumentByID(r.Context(), tenantID.String(), docID) | ||
| if updated != nil { | ||
| writeJSON(w, http.StatusOK, updated) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| //go:build !windows | ||
|
|
||
| package http | ||
|
|
||
| import ( | ||
| "errors" | ||
| "syscall" | ||
| ) | ||
|
|
||
| // oNoFollow is OR'd into the open flags so the kernel refuses to follow a | ||
| // symlink at the final path component — closes the Lstat→Open TOCTOU window | ||
| // where a tenant could swap a regular file for a symlink between the two | ||
| // syscalls. On Windows this is a no-op (see the _windows.go counterpart). | ||
| const oNoFollow = syscall.O_NOFOLLOW | ||
|
|
||
| // isSymlinkLoopErr reports whether err is the kernel's "refused to follow | ||
| // symlink" signal from an O_NOFOLLOW open (ELOOP on Linux/macOS). | ||
| func isSymlinkLoopErr(err error) bool { | ||
| return errors.Is(err, syscall.ELOOP) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| //go:build windows | ||
|
|
||
| package http | ||
|
|
||
| // Windows has no O_NOFOLLOW concept and follows reparse points transparently; | ||
| // the Lstat short-circuit in writeDocumentContent is the only protection | ||
| // available there. Desktop edition is single-user with no untrusted tenants, | ||
| // so this degradation is acceptable. | ||
| const oNoFollow = 0 | ||
|
|
||
| func isSymlinkLoopErr(err error) bool { return false } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Blocking (B1).
EvalSymlinksreturnsos.ErrNotExistfor a path that doesn't exist yet, and for non-master tenantsworkspacehere isworkspace/tenants/{slug}/which is only created on demand byhandleUpload(vault_handler_upload.go:109doesos.MkdirAll(...)before writing). On a fresh non-master tenant whose first vault interaction is this endpoint, the call 500s withfailed to write content.Suggested fix: either
os.MkdirAll(workspace, 0o755)beforeEvalSymlinks, or fall through to lexical containment onerrors.Is(err, os.ErrNotExist). Either way, please add a regression test with a non-master tenant UUID — current tests all usemasterTenantwhereTenantWorkspacereturns workspace unchanged, so this path isn't exercised.