11package http
22
33import (
4+ "errors"
45 "log/slog"
56 "net/http"
7+ "os"
8+ "path/filepath"
69 "strings"
10+ "time"
711
12+ "github.com/google/uuid"
13+ "github.com/nextlevelbuilder/goclaw/internal/eventbus"
814 "github.com/nextlevelbuilder/goclaw/internal/store"
15+ "github.com/nextlevelbuilder/goclaw/internal/vault"
916)
1017
18+ // writeDocumentContent writes the given content bytes to a tenant-workspace-
19+ // relative path, after validating the path stays inside the workspace and uses
20+ // an allowed extension. Returns the SHA-256 content hash on success.
21+ //
22+ // This is the shared writer used by handleCreateDocument and handleUpdateDocument
23+ // to materialise inline JSON `content` payloads on disk, so the enrichment
24+ // pipeline (which reads files by path) can later compute summaries, embeddings
25+ // and links.
26+ func (h * VaultHandler ) writeDocumentContent (workspace , relPath string , content []byte ) (string , error ) {
27+ // Ensure the tenant workspace exists before resolving symlinks. Non-master
28+ // tenants live under workspace/tenants/{slug}/ which handleUpload creates on
29+ // demand (see vault_handler_upload.go) — without this, the very first JSON
30+ // content write into a fresh tenant would fail EvalSymlinks with ErrNotExist
31+ // and 500 the request.
32+ if err := os .MkdirAll (workspace , 0o755 ); err != nil {
33+ return "" , err
34+ }
35+
36+ // Resolve the workspace root through any symlinks so all containment checks
37+ // compare canonical paths.
38+ realWS , err := filepath .EvalSymlinks (workspace )
39+ if err != nil {
40+ return "" , err
41+ }
42+ target := filepath .Join (realWS , filepath .Clean (relPath ))
43+
44+ // Lexical containment: blocks "../" and absolute-path escapes.
45+ if target != realWS && ! strings .HasPrefix (target , realWS + string (os .PathSeparator )) {
46+ slog .Warn ("security.vault_symlink_escape" ,
47+ "site" , "path_traversal" , "attempted" , target , "workspace" , realWS )
48+ return "" , os .ErrInvalid
49+ }
50+
51+ // Symlink containment: a lexical prefix check is not enough — a symlink that
52+ // lives *inside* the workspace but points outside it (e.g. workspace/link ->
53+ // /etc) would otherwise let the write follow it and clobber a file beyond
54+ // the workspace. Resolve the deepest already-existing ancestor of the target
55+ // and confirm it still canonicalises to a path inside the workspace, before
56+ // any directory is created or byte written.
57+ for ancestor := filepath .Dir (target ); ; {
58+ resolved , rerr := filepath .EvalSymlinks (ancestor )
59+ if rerr == nil {
60+ if resolved != realWS && ! strings .HasPrefix (resolved , realWS + string (os .PathSeparator )) {
61+ slog .Warn ("security.vault_symlink_escape" ,
62+ "site" , "ancestor" , "resolved" , resolved , "workspace" , realWS )
63+ return "" , os .ErrInvalid
64+ }
65+ break
66+ }
67+ if ! os .IsNotExist (rerr ) {
68+ return "" , rerr
69+ }
70+ parent := filepath .Dir (ancestor )
71+ if parent == ancestor {
72+ break // walked to the filesystem root without an existing ancestor
73+ }
74+ ancestor = parent
75+ }
76+
77+ if err := os .MkdirAll (filepath .Dir (target ), 0o755 ); err != nil {
78+ return "" , err
79+ }
80+
81+ // Fast-fail when the final component is already a symlink — gives a clean
82+ // rejection + security log without parsing OS-specific error codes. The
83+ // O_NOFOLLOW open below closes the TOCTOU window this Lstat leaves open
84+ // (an attacker could swap the file for a symlink between calls).
85+ if fi , lerr := os .Lstat (target ); lerr == nil && fi .Mode ()& os .ModeSymlink != 0 {
86+ slog .Warn ("security.vault_symlink_escape" ,
87+ "site" , "final_lstat" , "target" , target , "workspace" , realWS )
88+ return "" , os .ErrInvalid
89+ }
90+
91+ // O_NOFOLLOW makes the kernel refuse to follow a symlink at the final
92+ // component atomically with the open, removing the Lstat→Open race entirely
93+ // on Unix. On Windows oNoFollow == 0 and the Lstat above is best-effort.
94+ f , err := os .OpenFile (target , os .O_WRONLY | os .O_CREATE | os .O_TRUNC | oNoFollow , 0o644 )
95+ if err != nil {
96+ if isSymlinkLoopErr (err ) {
97+ slog .Warn ("security.vault_symlink_escape" ,
98+ "site" , "open_nofollow" , "target" , target , "workspace" , realWS )
99+ return "" , os .ErrInvalid
100+ }
101+ return "" , err
102+ }
103+ if _ , werr := f .Write (content ); werr != nil {
104+ _ = f .Close () // best-effort cleanup; the write error is the real one
105+ return "" , werr
106+ }
107+ // Explicit Close so a delayed FS error (e.g. disk-full at flush) surfaces
108+ // instead of being silently swallowed by a deferred close.
109+ if cerr := f .Close (); cerr != nil {
110+ return "" , cerr
111+ }
112+ return vault .ContentHash (content ), nil
113+ }
114+
115+ // publishDocUpserted enqueues the standard EventVaultDocUpserted so the
116+ // enrichment worker re-runs summary/embedding/link extraction for this doc.
117+ // No-op when the event bus is not wired (some test setups).
118+ func (h * VaultHandler ) publishDocUpserted (tenantID uuid.UUID , agentID , docID , relPath , hash , workspace string ) {
119+ if h .eventBus == nil {
120+ return
121+ }
122+ tenantIDStr := tenantID .String ()
123+ // Start enrichment progress before publishing so the worker pool can't drain
124+ // the event and finish before the progress tracker registers it — same
125+ // ordering handleUpload relies on to avoid that race.
126+ if h .enrichProgress != nil {
127+ h .enrichProgress .Start (1 , tenantID )
128+ }
129+ h .eventBus .Publish (eventbus.DomainEvent {
130+ ID : uuid .Must (uuid .NewV7 ()).String (),
131+ Type : eventbus .EventVaultDocUpserted ,
132+ SourceID : docID + ":" + hash ,
133+ TenantID : tenantIDStr ,
134+ AgentID : agentID ,
135+ Timestamp : time .Now (),
136+ Payload : eventbus.VaultDocUpsertedPayload {
137+ DocID : docID ,
138+ TenantID : tenantIDStr ,
139+ AgentID : agentID ,
140+ Path : relPath ,
141+ ContentHash : hash ,
142+ Workspace : workspace ,
143+ },
144+ })
145+ }
146+
11147// handleListAllDocuments lists vault documents across all agents in tenant.
12148// Optional query param agent_id to filter by specific agent.
13149func (h * VaultHandler ) handleListAllDocuments (w http.ResponseWriter , r * http.Request ) {
@@ -99,6 +235,16 @@ func (h *VaultHandler) handleGetDocument(w http.ResponseWriter, r *http.Request)
99235}
100236
101237// handleCreateDocument creates a new vault document.
238+ //
239+ // `content` semantics — symmetric with handleUpdateDocument:
240+ // - field omitted (nil pointer): metadata-only stub, no file write, no event
241+ // (typically followed by a rescan that materialises the file from disk).
242+ // - field present and non-empty: bytes materialised at <tenant-workspace>/<path>,
243+ // SHA-256 hash stored on the row, EventVaultDocUpserted emitted so the
244+ // enrichment worker computes summary/embeddings/links — same code path the
245+ // multipart /v1/vault/upload endpoint and the filesystem rescan use.
246+ // - field present and empty (""): a 0-byte file is written and the event still
247+ // fires. Same behaviour as PUT — pick the right shape on the client side.
102248func (h * VaultHandler ) handleCreateDocument (w http.ResponseWriter , r * http.Request ) {
103249 locale := extractLocale (r )
104250 tenantID := store .TenantIDFromContext (r .Context ())
@@ -107,6 +253,7 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque
107253 var body struct {
108254 Path string `json:"path"`
109255 Title string `json:"title"`
256+ Content * string `json:"content"` // nil=no write; ""=write empty file; "data"=write bytes (mirrors PUT)
110257 DocType string `json:"doc_type"`
111258 Scope string `json:"scope"`
112259 TeamID string `json:"team_id"`
@@ -137,6 +284,16 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque
137284 writeJSON (w , http .StatusBadRequest , map [string ]string {"error" : "invalid scope" })
138285 return
139286 }
287+ // Validate extension upfront if caller wants to write content — matches the
288+ // whitelist enforced by /v1/vault/upload so the two endpoints accept the
289+ // same file types. nil pointer = "no content field", which skips the check.
290+ if body .Content != nil {
291+ ext := strings .ToLower (filepath .Ext (body .Path ))
292+ if ! allowedUploadExts [ext ] {
293+ writeJSON (w , http .StatusBadRequest , map [string ]string {"error" : "unsupported file type: " + ext })
294+ return
295+ }
296+ }
140297
141298 doc := & store.VaultDocument {
142299 TenantID : tenantID .String (),
@@ -160,11 +317,50 @@ func (h *VaultHandler) handleCreateDocument(w http.ResponseWriter, r *http.Reque
160317 doc .Scope = "team"
161318 }
162319 }
320+
321+ // Persist file content if provided, before the DB upsert so the
322+ // content_hash column is set in a single write.
323+ var (
324+ wsPath string
325+ writtenHash string
326+ contentWasSet bool
327+ )
328+ if body .Content != nil {
329+ wsPath = h .resolveTenantWorkspace (r .Context ())
330+ if wsPath == "" {
331+ writeJSON (w , http .StatusBadRequest , map [string ]string {"error" : "workspace not available" })
332+ return
333+ }
334+ hash , werr := h .writeDocumentContent (wsPath , body .Path , []byte (* body .Content ))
335+ if werr != nil {
336+ slog .Warn ("vault.create: write content failed" , "path" , body .Path , "error" , werr )
337+ if errors .Is (werr , os .ErrInvalid ) {
338+ writeJSON (w , http .StatusBadRequest , map [string ]string {"error" : "path escapes workspace" })
339+ return
340+ }
341+ writeJSON (w , http .StatusInternalServerError , map [string ]string {"error" : "failed to write content" })
342+ return
343+ }
344+ doc .ContentHash = hash
345+ writtenHash = hash
346+ contentWasSet = true
347+ }
348+
163349 if err := h .store .UpsertDocument (r .Context (), doc ); err != nil {
164350 slog .Warn ("vault.create failed" , "error" , err )
165351 writeJSON (w , http .StatusInternalServerError , map [string ]string {"error" : err .Error ()})
166352 return
167353 }
354+
355+ // Fire enrichment after the DB row exists so the worker can locate the doc.
356+ if contentWasSet {
357+ evAgent := ""
358+ if doc .AgentID != nil {
359+ evAgent = * doc .AgentID
360+ }
361+ h .publishDocUpserted (tenantID , evAgent , doc .ID , body .Path , writtenHash , wsPath )
362+ }
363+
168364 // Re-fetch by ID (set via RETURNING) — unambiguous even when same path exists across teams.
169365 created , _ := h .store .GetDocumentByID (r .Context (), tenantID .String (), doc .ID )
170366 if created != nil {
@@ -189,6 +385,7 @@ func (h *VaultHandler) handleUpdateDocument(w http.ResponseWriter, r *http.Reque
189385
190386 var body struct {
191387 Title * string `json:"title"`
388+ Content * string `json:"content"` // nil=no change; "" clears the file; non-empty rewrites it
192389 DocType * string `json:"doc_type"`
193390 Scope * string `json:"scope"`
194391 TeamID * string `json:"team_id"` // nil=no change, ""=clear, "uuid"=set
@@ -225,11 +422,52 @@ func (h *VaultHandler) handleUpdateDocument(w http.ResponseWriter, r *http.Reque
225422 existing .Metadata = body .Metadata
226423 }
227424
425+ // Rewrite content on disk when caller supplied a `content` field.
426+ var (
427+ wsPath string
428+ writtenHash string
429+ contentWasSet bool
430+ )
431+ if body .Content != nil {
432+ ext := strings .ToLower (filepath .Ext (existing .Path ))
433+ if ! allowedUploadExts [ext ] {
434+ writeJSON (w , http .StatusBadRequest , map [string ]string {"error" : "unsupported file type: " + ext })
435+ return
436+ }
437+ wsPath = h .resolveTenantWorkspace (r .Context ())
438+ if wsPath == "" {
439+ writeJSON (w , http .StatusBadRequest , map [string ]string {"error" : "workspace not available" })
440+ return
441+ }
442+ hash , werr := h .writeDocumentContent (wsPath , existing .Path , []byte (* body .Content ))
443+ if werr != nil {
444+ slog .Warn ("vault.update: write content failed" , "path" , existing .Path , "error" , werr )
445+ if errors .Is (werr , os .ErrInvalid ) {
446+ writeJSON (w , http .StatusBadRequest , map [string ]string {"error" : "path escapes workspace" })
447+ return
448+ }
449+ writeJSON (w , http .StatusInternalServerError , map [string ]string {"error" : "failed to write content" })
450+ return
451+ }
452+ existing .ContentHash = hash
453+ writtenHash = hash
454+ contentWasSet = true
455+ }
456+
228457 if err := h .store .UpsertDocument (r .Context (), existing ); err != nil {
229458 slog .Warn ("vault.update failed" , "error" , err )
230459 writeJSON (w , http .StatusInternalServerError , map [string ]string {"error" : err .Error ()})
231460 return
232461 }
462+
463+ if contentWasSet {
464+ evAgent := ""
465+ if existing .AgentID != nil {
466+ evAgent = * existing .AgentID
467+ }
468+ h .publishDocUpserted (tenantID , evAgent , existing .ID , existing .Path , writtenHash , wsPath )
469+ }
470+
233471 updated , _ := h .store .GetDocumentByID (r .Context (), tenantID .String (), docID )
234472 if updated != nil {
235473 writeJSON (w , http .StatusOK , updated )
0 commit comments