feat: implement filesystem path validation in policy engine (#4)

yanurag-dev · web-flow · commit 207e5da185e6 · 2026-05-10T00:22:10.000+05:30
* feat: implement filesystem path validation in policy engine

* security: hardening filesystem policy with symlink resolution and precise segment matching
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -0,0 +1,71 @@
+# Sandforge Implementation Roadmap
+
+This roadmap tracks the progress of the Sandforge Agent Sandbox based on [ARCHITECTURE.md](ARCHITECTURE.md).
+
+## Phase 1: Foundation & Policy (Security First)
+*Goal: Establish the core interfaces and the "Deny by Default" security layer.*
+
+- [x] **1.1 Project Scaffolding**: Go workspace, directory structure, and `go.mod`.
+- [x] **1.2 Core API Contracts**: Define `SandboxSpec`, `ExecRequest`, and `SandboxBackend` interfaces.
+- [ ] **1.3 Policy Engine**:
+    - [x] Filesystem path validation (whitelist logic) [#1](https://github.com/yanurag-dev/sandforge/issues/1).
+    - [ ] Network mode enforcement (Offline/Fetch/Full) [#2](https://github.com/yanurag-dev/sandforge/issues/2).
+    - [ ] Resource limit validation (CPU/Memory/Disk) [#2](https://github.com/yanurag-dev/sandforge/issues/2).
+    - [ ] Command family filtering [#3](https://github.com/yanurag-dev/sandforge/issues/3).
+- [ ] **1.4 Testing**: Unit tests for policy enforcement.
+
+## Phase 2: Orchestration & Mocking
+*Goal: Build the state machine that manages sandbox lifecycles.*
+
+- [ ] **2.1 Sandbox Supervisor**:
+    - [ ] Implementation of the Lifecycle State Machine (Requested -> Provisioning -> Ready -> ...).
+    - [ ] Concurrent session management.
+- [ ] **2.2 Mock Backend Driver**:
+    - [ ] An in-memory/process-based driver for testing the supervisor without a VM.
+- [ ] **2.3 Artifact Manager**: Basic logic to handle "CopyOut" for logs and files.
+
+## Phase 3: macOS Execution Plane (macos-vz)
+*Goal: Boot a real Linux VM on macOS using the Apple Virtualization Framework.*
+
+- [ ] **3.1 Worker Image Preparation**: Create a minimal Linux kernel + initrd/disk image.
+- [ ] **3.2 VZ Driver Implementation**:
+    - [ ] VM configuration (vCPU, Memory).
+    - [ ] Virtio-fs or Virtio-9p for workspace mounting.
+    - [ ] Virtio-serial or VSOCK for command transport.
+- [ ] **3.3 Networking**: Implement `offline` and `fetch` (NAT) modes using VZ.
+
+## Phase 4: Linux Execution Plane (linux-kvm)
+*Goal: Parity for Linux hosts.*
+
+- [ ] **4.1 KVM/QEMU Driver**:
+    - [ ] Implementation of the `SandboxBackend` using KVM.
+    - [ ] Shared filesystem setup (Virtio-fs).
+- [ ] **4.2 (Optional) Firecracker**: MicroVM support for ultra-fast boot.
+
+## Phase 5: Task Runtime (Inside the Worker)
+*Goal: The boundary between the VM and the Agent's code.*
+
+- [ ] **5.1 Rootless Container Setup**: Pre-installing and configuring a container runtime (e.g., Podman/Docker) in the worker image.
+- [ ] **5.2 Task Runner Agent**: A small Go binary inside the VM that receives commands via VSOCK and runs them in a container.
+- [ ] **5.3 Cleanup Logic**: Ensuring the task container is destroyed immediately after execution.
+
+## Phase 6: Control Plane & Adapters
+*Goal: The external interface for Coding Agents.*
+
+- [ ] **6.1 Control Plane API**: REST/gRPC server to manage tasks and sessions.
+- [ ] **6.2 Agent Adapters**:
+    - [ ] Generic Tool-Calling Adapter.
+    - [ ] (Optional) Specific adapters for Claude/Codex.
+- [ ] **6.3 Secret Manager**: Injection of scoped secrets into the task environment.
+
+## Phase 7: CLI & Experience
+*Goal: Making it usable.*
+
+- [ ] **7.1 Sandforge CLI**: Commands like `sandforge run --dir . "npm test"`.
+- [ ] **7.2 Logging & Streaming**: Real-time stdout/stderr streaming from the sandbox to the terminal.
+- [ ] **7.3 Audit Logs**: Persisting execution history for review.
+
+---
+## Progress Legend
+- [ ] To Do
+- [x] Done
diff --git a/internal/policy/engine.go b/internal/policy/engine.go
@@ -0,0 +1,61 @@
+package policy
+
+import (
+	"errors"
+	"path/filepath"
+	"strings"
+
+	"github.com/sandforge/sandforge/pkg/api"
+)
+
+var (
+	ErrForbiddenHostPath = errors.New("requested host path is forbidden by policy")
+	ErrPathNotAbs        = errors.New("host path must be an absolute path")
+)
+
+type Engine struct {
+	AllowedHostPrefixes []string
+	BlockedHostPatterns []string
+}
+
+func (e *Engine) EvaluateMount(mount api.WorkspaceMount) error {
+	path := filepath.Clean(mount.HostPath)
+
+	if !filepath.IsAbs(path) {
+		return ErrPathNotAbs
+	}
+
+	// Resolve symlinks to prevent bypasses (e.g., a symlink pointing to /etc)
+	resolved, err := filepath.EvalSymlinks(path)
+	if err != nil {
+		return err // Path must exist to be validated
+	}
+	path = resolved
+
+	allowed := false
+	for _, prefix := range e.AllowedHostPrefixes {
+		p, err := filepath.EvalSymlinks(filepath.Clean(prefix))
+		if err != nil {
+			continue // Skip invalid prefixes
+		}
+		if path == p || strings.HasPrefix(path, p+string(filepath.Separator)) {
+			allowed = true
+			break
+		}
+	}
+
+	if !allowed {
+		return ErrForbiddenHostPath
+	}
+
+	// Precise segment matching for blocklist to avoid false positives
+	segments := strings.Split(path, string(filepath.Separator))
+	for _, pattern := range e.BlockedHostPatterns {
+		for _, segment := range segments {
+			if segment == pattern {
+				return ErrForbiddenHostPath
+			}
+		}
+	}
+	return nil
+}
diff --git a/internal/policy/engine_test.go b/internal/policy/engine_test.go
@@ -0,0 +1,117 @@
+package policy
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/sandforge/sandforge/pkg/api"
+)
+
+func TestEvaluateMount(t *testing.T) {
+	// Create a real temp directory for testing symlinks and path resolution
+	tempBase := t.TempDir()
+	
+	workspacesDir := filepath.Join(tempBase, "workspaces")
+	err := os.MkdirAll(workspacesDir, 0755)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a "forbidden" directory outside the allowed base
+	forbiddenDir := filepath.Join(tempBase, "forbidden")
+	err = os.MkdirAll(forbiddenDir, 0755)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a symlink that tries to "escape"
+	escapeSymlink := filepath.Join(workspacesDir, "escape-link")
+	err = os.Symlink(forbiddenDir, escapeSymlink)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a path that has a blocked pattern as a substring but not a segment
+	falsePositivePath := filepath.Join(workspacesDir, "my-ssh-notes.txt")
+	err = os.WriteFile(falsePositivePath, []byte("test"), 0644)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create a real blocked segment
+	blockedSegmentDir := filepath.Join(workspacesDir, ".ssh")
+	err = os.MkdirAll(blockedSegmentDir, 0755)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	engine := &Engine{
+		AllowedHostPrefixes: []string{
+			workspacesDir,
+		},
+		BlockedHostPatterns: []string{
+			".ssh",
+			"forbidden",
+		},
+	}
+
+	tests := []struct {
+		name      string
+		hostPath  string
+		wantError bool
+	}{
+		{
+			name:      "Valid path in workspace",
+			hostPath:  filepath.Join(workspacesDir, "task-1"),
+			wantError: false, // We'll create it first
+		},
+		{
+			name:      "Exact match of allowed prefix",
+			hostPath:  workspacesDir,
+			wantError: false,
+		},
+		{
+			name:      "Path outside whitelist",
+			hostPath:  tempBase, // The parent dir is not whitelisted
+			wantError: true,
+		},
+		{
+			name:      "Relative path rejected",
+			hostPath:  "relative/path",
+			wantError: true,
+		},
+		{
+			name:      "Symlink escape rejected",
+			hostPath:  escapeSymlink,
+			wantError: true,
+		},
+		{
+			name:      "False positive (substring .ssh) now ALLOWED",
+			hostPath:  falsePositivePath,
+			wantError: false,
+		},
+		{
+			name:      "Real blocked segment (.ssh) rejected",
+			hostPath:  blockedSegmentDir,
+			wantError: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Ensure the path exists so EvalSymlinks doesn't just fail on 'not found'
+			if !tt.wantError || tt.name == "Real blocked segment (.ssh) rejected" || tt.name == "Symlink escape rejected" {
+				os.MkdirAll(tt.hostPath, 0755)
+			}
+
+			mount := api.WorkspaceMount{
+				HostPath: tt.hostPath,
+			}
+			err := engine.EvaluateMount(mount)
+			if (err != nil) != tt.wantError {
+				t.Errorf("EvaluateMount() error = %v, wantError %v", err, tt.wantError)
+			}
+		})
+	}
+}