Skip to content

Commit ba59e7b

Browse files
aaron-tsarclaude
andcommitted
fix(vault): non-master tenant 500, O_NOFOLLOW, slog, *string content
Addresses second review on #1174 (thieung): **B1 (blocking)** — first JSON content write into a fresh non-master tenant returned 500 because workspace/tenants/{slug}/ doesn't exist yet and EvalSymlinks fails with ErrNotExist. writeDocumentContent now MkdirAlls the workspace before resolving (mirrors handleUpload). Added a regression test that drives a non-master tenant context end-to-end and asserts the file lands under workspace/tenants/{slug}/. **I1** — replaced the Lstat→WriteFile sequence (TOCTOU window) with os.OpenFile(O_WRONLY|O_CREATE|O_TRUNC|O_NOFOLLOW). The Lstat short-fail stays as defense-in-depth + cleaner error logging. Portability handled via build-tagged constants in vault_handler_documents_nofollow_{unix, windows}.go — O_NOFOLLOW is no-op on Windows (desktop edition is single-user with no untrusted tenants). **I3** — every symlink/path-escape rejection now emits slog.Warn("security.vault_symlink_escape", ...) with the rejection site, resolved path and workspace, matching the storage.go pattern so oncall can alert on attempted escapes. **I4** — POST `content` field changed from `string` to `*string` to match PUT. Nil pointer = "no write" (metadata-only stub); empty string = "write a 0-byte file + fire event"; non-empty = write bytes. Updated handler godoc to spell out the symmetry. Switched `werr == os.ErrInvalid` to `errors.Is` per repo conventions. **Tests added** — non-master tenant first content write (B1), PUT with nil content (locks "no change" contract), handler-level ".." path rejection, POST content:"" writes empty file. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 9349da8 commit ba59e7b

4 files changed

Lines changed: 219 additions & 19 deletions

internal/http/vault_handler_documents.go

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package http
22

33
import (
4+
"errors"
45
"log/slog"
56
"net/http"
67
"os"
@@ -23,6 +24,15 @@ import (
2324
// pipeline (which reads files by path) can later compute summaries, embeddings
2425
// and links.
2526
func (h *VaultHandler) writeDocumentContent(workspace, relPath string, content []byte) (string, error) {
27+
// Ensure the tenant workspace exists before resolving symlinks. Non-master
28+
// tenants live under workspace/tenants/{slug}/ which handleUpload creates on
29+
// demand (see vault_handler_upload.go) — without this, the very first JSON
30+
// content write into a fresh tenant would fail EvalSymlinks with ErrNotExist
31+
// and 500 the request.
32+
if err := os.MkdirAll(workspace, 0o755); err != nil {
33+
return "", err
34+
}
35+
2636
// Resolve the workspace root through any symlinks so all containment checks
2737
// compare canonical paths.
2838
realWS, err := filepath.EvalSymlinks(workspace)
@@ -33,19 +43,23 @@ func (h *VaultHandler) writeDocumentContent(workspace, relPath string, content [
3343

3444
// Lexical containment: blocks "../" and absolute-path escapes.
3545
if target != realWS && !strings.HasPrefix(target, realWS+string(os.PathSeparator)) {
46+
slog.Warn("security.vault_symlink_escape",
47+
"site", "lexical", "attempted", target, "workspace", realWS)
3648
return "", os.ErrInvalid
3749
}
3850

3951
// Symlink containment: a lexical prefix check is not enough — a symlink that
4052
// lives *inside* the workspace but points outside it (e.g. workspace/link ->
41-
// /etc) would otherwise let os.WriteFile follow it and clobber a file beyond
53+
// /etc) would otherwise let the write follow it and clobber a file beyond
4254
// the workspace. Resolve the deepest already-existing ancestor of the target
4355
// and confirm it still canonicalises to a path inside the workspace, before
4456
// any directory is created or byte written.
4557
for ancestor := filepath.Dir(target); ; {
4658
resolved, rerr := filepath.EvalSymlinks(ancestor)
4759
if rerr == nil {
4860
if resolved != realWS && !strings.HasPrefix(resolved, realWS+string(os.PathSeparator)) {
61+
slog.Warn("security.vault_symlink_escape",
62+
"site", "ancestor", "resolved", resolved, "workspace", realWS)
4963
return "", os.ErrInvalid
5064
}
5165
break
@@ -64,12 +78,30 @@ func (h *VaultHandler) writeDocumentContent(workspace, relPath string, content [
6478
return "", err
6579
}
6680

67-
// Refuse to follow a symlink planted at the final path component itself.
81+
// Fast-fail when the final component is already a symlink — gives a clean
82+
// rejection + security log without parsing OS-specific error codes. The
83+
// O_NOFOLLOW open below closes the TOCTOU window this Lstat leaves open
84+
// (an attacker could swap the file for a symlink between calls).
6885
if fi, lerr := os.Lstat(target); lerr == nil && fi.Mode()&os.ModeSymlink != 0 {
86+
slog.Warn("security.vault_symlink_escape",
87+
"site", "final_lstat", "target", target, "workspace", realWS)
6988
return "", os.ErrInvalid
7089
}
7190

72-
if err := os.WriteFile(target, content, 0o644); err != nil {
91+
// O_NOFOLLOW makes the kernel refuse to follow a symlink at the final
92+
// component atomically with the open, removing the Lstat→Open race entirely
93+
// on Unix. On Windows oNoFollow == 0 and the Lstat above is best-effort.
94+
f, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC|oNoFollow, 0o644)
95+
if err != nil {
96+
if isSymlinkLoopErr(err) {
97+
slog.Warn("security.vault_symlink_escape",
98+
"site", "open_nofollow", "target", target, "workspace", realWS)
99+
return "", os.ErrInvalid
100+
}
101+
return "", err
102+
}
103+
defer f.Close()
104+
if _, err := f.Write(content); err != nil {
73105
return "", err
74106
}
75107
return vault.ContentHash(content), nil
@@ -199,15 +231,15 @@ func (h *VaultHandler) handleGetDocument(w http.ResponseWriter, r *http.Request)
199231

200232
// handleCreateDocument creates a new vault document.
201233
//
202-
// When `content` is supplied in the JSON body, the bytes are materialised on
203-
// disk at <tenant-workspace>/<path> and the SHA-256 hash is stored on the
204-
// document record. An EventVaultDocUpserted is then emitted so the enrichment
205-
// worker computes summary/embeddings/links — same code path the multipart
206-
// /v1/vault/upload endpoint and the filesystem rescan use.
207-
//
208-
// Omitting `content` preserves the historical behaviour of registering a
209-
// metadata-only stub (typically followed by a rescan that materialises the
210-
// file from disk).
234+
// `content` semantics — symmetric with handleUpdateDocument:
235+
// - field omitted (nil pointer): metadata-only stub, no file write, no event
236+
// (typically followed by a rescan that materialises the file from disk).
237+
// - field present and non-empty: bytes materialised at <tenant-workspace>/<path>,
238+
// SHA-256 hash stored on the row, EventVaultDocUpserted emitted so the
239+
// enrichment worker computes summary/embeddings/links — same code path the
240+
// multipart /v1/vault/upload endpoint and the filesystem rescan use.
241+
// - field present and empty (""): a 0-byte file is written and the event still
242+
// fires. Same behaviour as PUT — pick the right shape on the client side.
211243
func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Request) {
212244
locale := extractLocale(r)
213245
tenantID := store.TenantIDFromContext(r.Context())
@@ -216,7 +248,7 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque
216248
var body struct {
217249
Path string `json:"path"`
218250
Title string `json:"title"`
219-
Content string `json:"content"` // optional; when set, written to disk
251+
Content *string `json:"content"` // nil=no write; ""=write empty file; "data"=write bytes (mirrors PUT)
220252
DocType string `json:"doc_type"`
221253
Scope string `json:"scope"`
222254
TeamID string `json:"team_id"`
@@ -249,8 +281,8 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque
249281
}
250282
// Validate extension upfront if caller wants to write content — matches the
251283
// whitelist enforced by /v1/vault/upload so the two endpoints accept the
252-
// same file types.
253-
if body.Content != "" {
284+
// same file types. nil pointer = "no content field", which skips the check.
285+
if body.Content != nil {
254286
ext := strings.ToLower(filepath.Ext(body.Path))
255287
if !allowedUploadExts[ext] {
256288
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "unsupported file type: " + ext})
@@ -288,16 +320,16 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque
288320
writtenHash string
289321
contentWasSet bool
290322
)
291-
if body.Content != "" {
323+
if body.Content != nil {
292324
wsPath = h.resolveTenantWorkspace(r.Context())
293325
if wsPath == "" {
294326
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "workspace not available"})
295327
return
296328
}
297-
hash, werr := h.writeDocumentContent(wsPath, body.Path, []byte(body.Content))
329+
hash, werr := h.writeDocumentContent(wsPath, body.Path, []byte(*body.Content))
298330
if werr != nil {
299331
slog.Warn("vault.create: write content failed", "path", body.Path, "error", werr)
300-
if werr == os.ErrInvalid {
332+
if errors.Is(werr, os.ErrInvalid) {
301333
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "path escapes workspace"})
302334
return
303335
}
@@ -405,7 +437,7 @@ func (h *VaultHandler) handleUpdateDocument(w http.ResponseWriter, r *http.Reque
405437
hash, werr := h.writeDocumentContent(wsPath, existing.Path, []byte(*body.Content))
406438
if werr != nil {
407439
slog.Warn("vault.update: write content failed", "path", existing.Path, "error", werr)
408-
if werr == os.ErrInvalid {
440+
if errors.Is(werr, os.ErrInvalid) {
409441
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "path escapes workspace"})
410442
return
411443
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//go:build !windows
2+
3+
package http
4+
5+
import (
6+
"errors"
7+
"syscall"
8+
)
9+
10+
// oNoFollow is OR'd into the open flags so the kernel refuses to follow a
11+
// symlink at the final path component — closes the Lstat→Open TOCTOU window
12+
// where a tenant could swap a regular file for a symlink between the two
13+
// syscalls. On Windows this is a no-op (see the _windows.go counterpart).
14+
const oNoFollow = syscall.O_NOFOLLOW
15+
16+
// isSymlinkLoopErr reports whether err is the kernel's "refused to follow
17+
// symlink" signal from an O_NOFOLLOW open (ELOOP on Linux/macOS).
18+
func isSymlinkLoopErr(err error) bool {
19+
return errors.Is(err, syscall.ELOOP)
20+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
//go:build windows
2+
3+
package http
4+
5+
// Windows has no O_NOFOLLOW concept and follows reparse points transparently;
6+
// the Lstat short-circuit in writeDocumentContent is the only protection
7+
// available there. Desktop edition is single-user with no untrusted tenants,
8+
// so this degradation is acceptable.
9+
const oNoFollow = 0
10+
11+
func isSymlinkLoopErr(err error) bool { return false }

internal/http/vault_handler_documents_test.go

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,140 @@ func TestHandleUpdateDocument_EmptyContentClearsFile(t *testing.T) {
339339
t.Errorf("published %d events, want 1", len(bus.published))
340340
}
341341
}
342+
343+
// Locks the "no change" contract on PUT: omitting `content` (nil pointer in
344+
// the body struct) must not touch the disk or fire an enrichment event, even
345+
// when other metadata fields are present.
346+
func TestHandleUpdateDocument_NilContentSkipsWriteAndEvent(t *testing.T) {
347+
st := newFakeVaultStore()
348+
st.docs["existing-2"] = &store.VaultDocument{
349+
ID: "existing-2",
350+
TenantID: masterTenant.String(),
351+
Path: "notes/keep.md",
352+
Title: "Old",
353+
DocType: "note",
354+
Scope: "shared",
355+
ContentHash: "preexisting-hash",
356+
}
357+
bus := &fakeEventBus{store: st}
358+
ws := t.TempDir()
359+
h := &VaultHandler{store: st, eventBus: bus, workspace: ws}
360+
361+
// content field intentionally omitted → *string is nil → no write, no event.
362+
req := newJSONRequest(t, http.MethodPut, "/v1/vault/documents/existing-2", map[string]any{
363+
"title": "Renamed but content untouched",
364+
})
365+
req.SetPathValue("docID", "existing-2")
366+
rr := httptest.NewRecorder()
367+
368+
h.handleUpdateDocument(rr, req)
369+
370+
if rr.Code != http.StatusOK {
371+
t.Fatalf("status = %d, want 200 (body: %s)", rr.Code, rr.Body.String())
372+
}
373+
if _, statErr := os.Stat(filepath.Join(ws, "notes", "keep.md")); statErr == nil {
374+
t.Error("nil content must not materialise a file")
375+
}
376+
if st.docs["existing-2"].ContentHash != "preexisting-hash" {
377+
t.Errorf("content_hash = %q, want unchanged 'preexisting-hash'", st.docs["existing-2"].ContentHash)
378+
}
379+
if len(bus.published) != 0 {
380+
t.Errorf("nil-content PUT must not publish events, got %d", len(bus.published))
381+
}
382+
}
383+
384+
// Handler-level guard at vault_handler_documents.go path validation must reject
385+
// "..", short-circuiting before writeDocumentContent even runs.
386+
func TestHandleCreateDocument_RejectsParentTraversalPath(t *testing.T) {
387+
h := &VaultHandler{} // path check runs before any store/workspace access
388+
req := newJSONRequest(t, http.MethodPost, "/v1/vault/documents", map[string]any{
389+
"path": "../../etc/passwd.md",
390+
"title": "x",
391+
"content": "evil",
392+
})
393+
rr := httptest.NewRecorder()
394+
395+
h.handleCreateDocument(rr, req)
396+
397+
assertBadRequest(t, rr, "invalid path")
398+
}
399+
400+
// Mirrors PUT semantics on POST: `content: ""` writes a 0-byte file and
401+
// fires the enrichment event (the I4 fix that aligned POST/PUT).
402+
func TestHandleCreateDocument_EmptyContentWritesEmptyFile(t *testing.T) {
403+
st := newFakeVaultStore()
404+
bus := &fakeEventBus{store: st}
405+
ws := t.TempDir()
406+
h := &VaultHandler{store: st, eventBus: bus, workspace: ws}
407+
408+
req := newJSONRequest(t, http.MethodPost, "/v1/vault/documents", map[string]any{
409+
"path": "placeholder.md",
410+
"title": "Placeholder",
411+
"content": "",
412+
})
413+
rr := httptest.NewRecorder()
414+
415+
h.handleCreateDocument(rr, req)
416+
417+
if rr.Code != http.StatusCreated {
418+
t.Fatalf("status = %d, want 201 (body: %s)", rr.Code, rr.Body.String())
419+
}
420+
fi, err := os.Stat(filepath.Join(ws, "placeholder.md"))
421+
if err != nil {
422+
t.Fatalf("stat empty file: %v", err)
423+
}
424+
if fi.Size() != 0 {
425+
t.Errorf("size = %d, want 0", fi.Size())
426+
}
427+
if len(bus.published) != 1 {
428+
t.Errorf("published %d events, want 1", len(bus.published))
429+
}
430+
}
431+
432+
// B1 regression: non-master tenants resolve to workspace/tenants/{slug}/ which
433+
// is created on demand. The very first JSON content write into a fresh tenant
434+
// must NOT 500 because EvalSymlinks can't find that dir — writeDocumentContent
435+
// is responsible for creating it (mirrors what handleUpload does).
436+
func TestHandleCreateDocument_NonMasterTenantFirstContentWrite(t *testing.T) {
437+
st := newFakeVaultStore()
438+
bus := &fakeEventBus{store: st}
439+
ws := t.TempDir()
440+
h := &VaultHandler{store: st, eventBus: bus, workspace: ws}
441+
442+
nonMaster := uuid.MustParse("01999999-1111-7000-8000-000000000abc")
443+
ctx := store.WithTenantSlug(store.WithTenantID(context.Background(), nonMaster), "acme")
444+
445+
req := httptest.NewRequest(http.MethodPost, "/v1/vault/documents", bytes.NewReader(mustJSON(t, map[string]any{
446+
"path": "notes/first.md",
447+
"title": "First",
448+
"content": "hello from acme",
449+
}))).WithContext(ctx)
450+
req.Header.Set("Content-Type", "application/json")
451+
rr := httptest.NewRecorder()
452+
453+
h.handleCreateDocument(rr, req)
454+
455+
if rr.Code != http.StatusCreated {
456+
t.Fatalf("status = %d, want 201 (body: %s)", rr.Code, rr.Body.String())
457+
}
458+
tenantDir := filepath.Join(ws, "tenants", "acme")
459+
got, err := os.ReadFile(filepath.Join(tenantDir, "notes", "first.md"))
460+
if err != nil {
461+
t.Fatalf("read non-master tenant doc: %v", err)
462+
}
463+
if string(got) != "hello from acme" {
464+
t.Errorf("content = %q", got)
465+
}
466+
if len(bus.published) != 1 {
467+
t.Errorf("published %d events, want 1", len(bus.published))
468+
}
469+
}
470+
471+
func mustJSON(t *testing.T, v any) []byte {
472+
t.Helper()
473+
b, err := json.Marshal(v)
474+
if err != nil {
475+
t.Fatalf("marshal: %v", err)
476+
}
477+
return b
478+
}

0 commit comments

Comments
 (0)