Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 238 additions & 0 deletions internal/http/vault_handler_documents.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,149 @@
package http

import (
"errors"
"log/slog"
"net/http"
"os"
"path/filepath"
"strings"
"time"

"github.com/google/uuid"
"github.com/nextlevelbuilder/goclaw/internal/eventbus"
"github.com/nextlevelbuilder/goclaw/internal/store"
"github.com/nextlevelbuilder/goclaw/internal/vault"
)

// writeDocumentContent writes the given content bytes to a tenant-workspace-
// relative path, after validating the path stays inside the workspace and uses
// an allowed extension. Returns the SHA-256 content hash on success.
//
// This is the shared writer used by handleCreateDocument and handleUpdateDocument
// to materialise inline JSON `content` payloads on disk, so the enrichment
// pipeline (which reads files by path) can later compute summaries, embeddings
// and links.
func (h *VaultHandler) writeDocumentContent(workspace, relPath string, content []byte) (string, error) {
// Ensure the tenant workspace exists before resolving symlinks. Non-master
// tenants live under workspace/tenants/{slug}/ which handleUpload creates on
// demand (see vault_handler_upload.go) — without this, the very first JSON
// content write into a fresh tenant would fail EvalSymlinks with ErrNotExist
// and 500 the request.
if err := os.MkdirAll(workspace, 0o755); err != nil {
return "", err
}

// Resolve the workspace root through any symlinks so all containment checks
// compare canonical paths.
realWS, err := filepath.EvalSymlinks(workspace)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Blocking (B1). EvalSymlinks returns os.ErrNotExist for a path that doesn't exist yet, and for non-master tenants workspace here is workspace/tenants/{slug}/ which is only created on demand by handleUpload (vault_handler_upload.go:109 does os.MkdirAll(...) before writing). On a fresh non-master tenant whose first vault interaction is this endpoint, the call 500s with failed to write content.

Suggested fix: either os.MkdirAll(workspace, 0o755) before EvalSymlinks, or fall through to lexical containment on errors.Is(err, os.ErrNotExist). Either way, please add a regression test with a non-master tenant UUID — current tests all use masterTenant where TenantWorkspace returns workspace unchanged, so this path isn't exercised.

if err != nil {
return "", err
}
target := filepath.Join(realWS, filepath.Clean(relPath))

// Lexical containment: blocks "../" and absolute-path escapes.
if target != realWS && !strings.HasPrefix(target, realWS+string(os.PathSeparator)) {
slog.Warn("security.vault_symlink_escape",
"site", "path_traversal", "attempted", target, "workspace", realWS)
return "", os.ErrInvalid
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Important (I3). This (and lines 49, 69) reject symlink/path-escape attempts by returning os.ErrInvalid, but the only resulting log line is the generic vault.create: write content failed at line 299 — oncall can't alert on attempted escapes. Compare with internal/http/storage.go:158 (slog.Warn("security.storage_symlink_escape", "resolved", ..., "base", ...)) and :175 — the codebase pattern is a security.* slog at the rejection site itself. Suggest adding slog.Warn("security.vault_symlink_escape", ...) with the resolved path + workspace at each of the three rejection points before returning.

}

// Symlink containment: a lexical prefix check is not enough — a symlink that
// lives *inside* the workspace but points outside it (e.g. workspace/link ->
// /etc) would otherwise let the write follow it and clobber a file beyond
// the workspace. Resolve the deepest already-existing ancestor of the target
// and confirm it still canonicalises to a path inside the workspace, before
// any directory is created or byte written.
for ancestor := filepath.Dir(target); ; {
resolved, rerr := filepath.EvalSymlinks(ancestor)
if rerr == nil {
if resolved != realWS && !strings.HasPrefix(resolved, realWS+string(os.PathSeparator)) {
slog.Warn("security.vault_symlink_escape",
"site", "ancestor", "resolved", resolved, "workspace", realWS)
return "", os.ErrInvalid
}
break
}
if !os.IsNotExist(rerr) {
return "", rerr
}
parent := filepath.Dir(ancestor)
if parent == ancestor {
break // walked to the filesystem root without an existing ancestor
}
ancestor = parent
}

if err := os.MkdirAll(filepath.Dir(target), 0o755); err != nil {
return "", err
}

// Fast-fail when the final component is already a symlink — gives a clean
// rejection + security log without parsing OS-specific error codes. The
// O_NOFOLLOW open below closes the TOCTOU window this Lstat leaves open
// (an attacker could swap the file for a symlink between calls).
if fi, lerr := os.Lstat(target); lerr == nil && fi.Mode()&os.ModeSymlink != 0 {
slog.Warn("security.vault_symlink_escape",
"site", "final_lstat", "target", target, "workspace", realWS)
return "", os.ErrInvalid
}

// O_NOFOLLOW makes the kernel refuse to follow a symlink at the final
// component atomically with the open, removing the Lstat→Open race entirely
// on Unix. On Windows oNoFollow == 0 and the Lstat above is best-effort.
f, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC|oNoFollow, 0o644)
if err != nil {
if isSymlinkLoopErr(err) {
slog.Warn("security.vault_symlink_escape",
"site", "open_nofollow", "target", target, "workspace", realWS)
return "", os.ErrInvalid
}
return "", err
}
if _, werr := f.Write(content); werr != nil {
_ = f.Close() // best-effort cleanup; the write error is the real one
return "", werr
}
// Explicit Close so a delayed FS error (e.g. disk-full at flush) surfaces
// instead of being silently swallowed by a deferred close.
if cerr := f.Close(); cerr != nil {
return "", cerr
}
return vault.ContentHash(content), nil
}

// publishDocUpserted enqueues the standard EventVaultDocUpserted so the
// enrichment worker re-runs summary/embedding/link extraction for this doc.
// No-op when the event bus is not wired (some test setups).
func (h *VaultHandler) publishDocUpserted(tenantID uuid.UUID, agentID, docID, relPath, hash, workspace string) {
if h.eventBus == nil {
return
}
tenantIDStr := tenantID.String()
// Start enrichment progress before publishing so the worker pool can't drain
// the event and finish before the progress tracker registers it — same
// ordering handleUpload relies on to avoid that race.
if h.enrichProgress != nil {
h.enrichProgress.Start(1, tenantID)
}
h.eventBus.Publish(eventbus.DomainEvent{
ID: uuid.Must(uuid.NewV7()).String(),
Type: eventbus.EventVaultDocUpserted,
SourceID: docID + ":" + hash,
TenantID: tenantIDStr,
AgentID: agentID,
Timestamp: time.Now(),
Payload: eventbus.VaultDocUpsertedPayload{
DocID: docID,
TenantID: tenantIDStr,
AgentID: agentID,
Path: relPath,
ContentHash: hash,
Workspace: workspace,
},
})
}

// handleListAllDocuments lists vault documents across all agents in tenant.
// Optional query param agent_id to filter by specific agent.
func (h *VaultHandler) handleListAllDocuments(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -99,6 +235,16 @@ func (h *VaultHandler) handleGetDocument(w http.ResponseWriter, r *http.Request)
}

// handleCreateDocument creates a new vault document.
//
// `content` semantics — symmetric with handleUpdateDocument:
// - field omitted (nil pointer): metadata-only stub, no file write, no event
// (typically followed by a rescan that materialises the file from disk).
// - field present and non-empty: bytes materialised at <tenant-workspace>/<path>,
// SHA-256 hash stored on the row, EventVaultDocUpserted emitted so the
// enrichment worker computes summary/embeddings/links — same code path the
// multipart /v1/vault/upload endpoint and the filesystem rescan use.
// - field present and empty (""): a 0-byte file is written and the event still
// fires. Same behaviour as PUT — pick the right shape on the client side.
func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Request) {
locale := extractLocale(r)
tenantID := store.TenantIDFromContext(r.Context())
Expand All @@ -107,6 +253,7 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque
var body struct {
Path string `json:"path"`
Title string `json:"title"`
Content *string `json:"content"` // nil=no write; ""=write empty file; "data"=write bytes (mirrors PUT)
DocType string `json:"doc_type"`
Scope string `json:"scope"`
TeamID string `json:"team_id"`
Expand Down Expand Up @@ -137,6 +284,16 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid scope"})
return
}
// Validate extension upfront if caller wants to write content — matches the
// whitelist enforced by /v1/vault/upload so the two endpoints accept the
// same file types. nil pointer = "no content field", which skips the check.
if body.Content != nil {
ext := strings.ToLower(filepath.Ext(body.Path))
if !allowedUploadExts[ext] {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "unsupported file type: " + ext})
return
}
}

doc := &store.VaultDocument{
TenantID: tenantID.String(),
Expand All @@ -160,11 +317,50 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque
doc.Scope = "team"
}
}

// Persist file content if provided, before the DB upsert so the
// content_hash column is set in a single write.
var (
wsPath string
writtenHash string
contentWasSet bool
)
if body.Content != nil {
wsPath = h.resolveTenantWorkspace(r.Context())
if wsPath == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "workspace not available"})
return
}
hash, werr := h.writeDocumentContent(wsPath, body.Path, []byte(*body.Content))
if werr != nil {
slog.Warn("vault.create: write content failed", "path", body.Path, "error", werr)
if errors.Is(werr, os.ErrInvalid) {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "path escapes workspace"})
return
}
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to write content"})
return
}
doc.ContentHash = hash
writtenHash = hash
contentWasSet = true
}

if err := h.store.UpsertDocument(r.Context(), doc); err != nil {
slog.Warn("vault.create failed", "error", err)
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
return
}

// Fire enrichment after the DB row exists so the worker can locate the doc.
if contentWasSet {
evAgent := ""
if doc.AgentID != nil {
evAgent = *doc.AgentID
}
h.publishDocUpserted(tenantID, evAgent, doc.ID, body.Path, writtenHash, wsPath)
}

// Re-fetch by ID (set via RETURNING) — unambiguous even when same path exists across teams.
created, _ := h.store.GetDocumentByID(r.Context(), tenantID.String(), doc.ID)
if created != nil {
Expand All @@ -189,6 +385,7 @@ func (h *VaultHandler) handleUpdateDocument(w http.ResponseWriter, r *http.Reque

var body struct {
Title *string `json:"title"`
Content *string `json:"content"` // nil=no change; "" clears the file; non-empty rewrites it
DocType *string `json:"doc_type"`
Scope *string `json:"scope"`
TeamID *string `json:"team_id"` // nil=no change, ""=clear, "uuid"=set
Expand Down Expand Up @@ -225,11 +422,52 @@ func (h *VaultHandler) handleUpdateDocument(w http.ResponseWriter, r *http.Reque
existing.Metadata = body.Metadata
}

// Rewrite content on disk when caller supplied a `content` field.
var (
wsPath string
writtenHash string
contentWasSet bool
)
if body.Content != nil {
ext := strings.ToLower(filepath.Ext(existing.Path))
if !allowedUploadExts[ext] {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "unsupported file type: " + ext})
return
}
wsPath = h.resolveTenantWorkspace(r.Context())
if wsPath == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "workspace not available"})
return
}
hash, werr := h.writeDocumentContent(wsPath, existing.Path, []byte(*body.Content))
if werr != nil {
slog.Warn("vault.update: write content failed", "path", existing.Path, "error", werr)
if errors.Is(werr, os.ErrInvalid) {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "path escapes workspace"})
return
}
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "failed to write content"})
return
}
existing.ContentHash = hash
writtenHash = hash
contentWasSet = true
}

if err := h.store.UpsertDocument(r.Context(), existing); err != nil {
slog.Warn("vault.update failed", "error", err)
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
return
}

if contentWasSet {
evAgent := ""
if existing.AgentID != nil {
evAgent = *existing.AgentID
}
h.publishDocUpserted(tenantID, evAgent, existing.ID, existing.Path, writtenHash, wsPath)
}

updated, _ := h.store.GetDocumentByID(r.Context(), tenantID.String(), docID)
if updated != nil {
writeJSON(w, http.StatusOK, updated)
Expand Down
20 changes: 20 additions & 0 deletions internal/http/vault_handler_documents_nofollow_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//go:build !windows

package http

import (
"errors"
"syscall"
)

// oNoFollow is OR'd into the open flags so the kernel refuses to follow a
// symlink at the final path component — closes the Lstat→Open TOCTOU window
// where a tenant could swap a regular file for a symlink between the two
// syscalls. On Windows this is a no-op (see the _windows.go counterpart).
const oNoFollow = syscall.O_NOFOLLOW

// isSymlinkLoopErr reports whether err is the kernel's "refused to follow
// symlink" signal from an O_NOFOLLOW open (ELOOP on Linux/macOS).
func isSymlinkLoopErr(err error) bool {
return errors.Is(err, syscall.ELOOP)
}
11 changes: 11 additions & 0 deletions internal/http/vault_handler_documents_nofollow_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//go:build windows

package http

// Windows has no O_NOFOLLOW concept and follows reparse points transparently;
// the Lstat short-circuit in writeDocumentContent is the only protection
// available there. Desktop edition is single-user with no untrusted tenants,
// so this degradation is acceptable.
const oNoFollow = 0

func isSymlinkLoopErr(err error) bool { return false }
Loading
Loading