From 5eed12e450484de55f84a9d0d317bb3c4c40fa25 Mon Sep 17 00:00:00 2001 From: Anurag Yadav Date: Sun, 10 May 2026 16:01:06 +0530 Subject: [PATCH 1/2] feat: implement Sandbox Supervisor and Mock Backend - Add Supervisor state machine and lifecycle management (Start, RunCommand, Stop). - Implement MockBackend for testing the orchestration layer without a real VM. - Add comprehensive unit tests covering state transitions, concurrency safety (sync.RWMutex), and policy enforcement. Resolves: #9 --- internal/backend/mock.go | 49 ++++++++ internal/supervisor/supervisor.go | 155 +++++++++++++++++++++++++ internal/supervisor/supervisor_test.go | 86 ++++++++++++++ 3 files changed, 290 insertions(+) create mode 100644 internal/backend/mock.go create mode 100644 internal/supervisor/supervisor.go create mode 100644 internal/supervisor/supervisor_test.go diff --git a/internal/backend/mock.go b/internal/backend/mock.go new file mode 100644 index 0000000..ab85984 --- /dev/null +++ b/internal/backend/mock.go @@ -0,0 +1,49 @@ +package backend + +import ( + "fmt" + "sync" + + "github.com/sandforge/sandforge/pkg/api" +) + +type MockBackend struct { + mu sync.RWMutex + sandboxes map[string]api.SandboxSpec +} + +func NewMockBackend() *MockBackend { + return &MockBackend{ + sandboxes: make(map[string]api.SandboxSpec), + } +} + +func (m *MockBackend) CreateSandbox(spec api.SandboxSpec) (string, error) { + m.mu.Lock() + defer m.mu.Unlock() + handle := fmt.Sprintf("mock-%d", len(m.sandboxes)) + m.sandboxes[handle] = spec + return handle, nil +} + +func (m *MockBackend) MountWorkspace(handle string, mount api.WorkspaceMount) error { + return nil +} + +func (m *MockBackend) Exec(handle string, req api.ExecRequest) (api.ExecResult, error) { + return api.ExecResult{ + ExitCode: 0, + Stdout: fmt.Sprintf("mock output for %v", req.Command), + }, nil +} + +func (m *MockBackend) CopyOut(handle string, path string, dest string) error { + return nil +} + +func (m *MockBackend) DestroySandbox(handle string) error { + m.mu.Lock() + defer m.mu.Unlock() + delete(m.sandboxes, handle) + return nil +} diff --git a/internal/supervisor/supervisor.go b/internal/supervisor/supervisor.go new file mode 100644 index 0000000..8b55b70 --- /dev/null +++ b/internal/supervisor/supervisor.go @@ -0,0 +1,155 @@ +package supervisor + +import ( + "errors" + "sync" + + "github.com/sandforge/sandforge/internal/policy" + "github.com/sandforge/sandforge/pkg/api" +) + +// State represents the current lifecycle phase of a sandbox. +type State string + +const ( + StateRequested State = "requested" + StateProvisioning State = "provisioning" + StateReady State = "ready" + StateExecuting State = "executing" + StateCopyingArtifacts State = "copying_artifacts" + StateDestroying State = "destroying" + StateDestroyed State = "destroyed" + StateError State = "error" +) + +// SandboxInstance tracks the runtime state of a single sandbox. +type SandboxInstance struct { + ID string + Spec api.SandboxSpec + State State + Handle string // The backend-specific identifier + Error error +} + +// Supervisor orchestrates sandbox lifecycles and enforces policy. +type Supervisor struct { + mu sync.RWMutex + instances map[string]*SandboxInstance + + backend api.SandboxBackend + policy *policy.Engine +} + +func NewSupervisor(backend api.SandboxBackend, engine *policy.Engine) *Supervisor { + return &Supervisor{ + instances: make(map[string]*SandboxInstance), + backend: backend, + policy: engine, + } +} + +// Start will be your entry point to create and boot a sandbox. +func (s *Supervisor) Start(id string, spec api.SandboxSpec) error { + // 1. Evaluate policy + if err := s.policy.EvaluateSandbox(spec); err != nil { + return err + } + + // 2. Register instance in 'requested' state + s.mu.Lock() + if _, exists := s.instances[id]; exists { + s.mu.Unlock() + return errors.New("sandbox ID already exists") + } + + instance := &SandboxInstance{ + ID: id, + Spec: spec, + State: StateRequested, + } + s.instances[id] = instance + s.mu.Unlock() + + // 3. Move to 'provisioning' and call backend.CreateSandbox + instance.State = StateProvisioning + handle, err := s.backend.CreateSandbox(spec) + if err != nil { + instance.State = StateError + instance.Error = err + return err + } + + // 4. Update state to 'ready' + instance.Handle = handle + instance.State = StateReady + + return nil +} + +// RunCommand will be used to execute something in a ready sandbox. +func (s *Supervisor) RunCommand(id string, req api.ExecRequest) (api.ExecResult, error) { + // 1. Find the instance + s.mu.RLock() + instance, exists := s.instances[id] + s.mu.RUnlock() + + if !exists { + return api.ExecResult{}, errors.New("sandbox not found") + } + + // 2. Validate state and policy + if instance.State != StateReady { + return api.ExecResult{}, errors.New("sandbox is not in 'ready' state") + } + if err := s.policy.EvaluateExec(req); err != nil { + return api.ExecResult{}, err + } + + // 3. Update state to 'executing' + instance.State = StateExecuting + + // Ensure we go back to 'ready' unless a fatal error occurred + defer func() { + if instance.State == StateExecuting { + instance.State = StateReady + } + }() + + // 4. Call backend + result, err := s.backend.Exec(instance.Handle, req) + if err != nil { + instance.State = StateError + instance.Error = err + return result, err + } + + return result, nil +} + +// Stop will clean up the sandbox. +func (s *Supervisor) Stop(id string) error { + // 1. Find the instance + s.mu.Lock() // We use a full Lock because we might delete it + defer s.mu.Unlock() + + instance, exists := s.instances[id] + if !exists { + return errors.New("sandbox not found") + } + + // 2. Move state to 'destroying' + instance.State = StateDestroying + + // 3. Call backend.DestroySandbox + if err := s.backend.DestroySandbox(instance.Handle); err != nil { + instance.State = StateError + instance.Error = err + return err + } + + // 4. Mark destroyed and remove from map + instance.State = StateDestroyed + delete(s.instances, id) + + return nil +} diff --git a/internal/supervisor/supervisor_test.go b/internal/supervisor/supervisor_test.go new file mode 100644 index 0000000..ab5e86e --- /dev/null +++ b/internal/supervisor/supervisor_test.go @@ -0,0 +1,86 @@ +package supervisor + +import ( + "testing" + + "github.com/sandforge/sandforge/internal/backend" + "github.com/sandforge/sandforge/internal/policy" + "github.com/sandforge/sandforge/pkg/api" +) + +func TestSupervisorLifecycle(t *testing.T) { + // Setup + mockBackend := backend.NewMockBackend() + engine := &policy.Engine{ + MaxCPU: 4, + MaxMemoryMb: 4096, + MaxDiskGb: 10, + AllowedNetworkModes: []string{"offline"}, + AllowedCommands: []string{"ls", "echo"}, + } + sup := NewSupervisor(mockBackend, engine) + + spec := api.SandboxSpec{ + CPU: 2, + MemoryMb: 1024, + DiskGb: 5, + NetworkMode: "offline", + } + + id := "test-1" + + // 1. Test Start + t.Run("Start", func(t *testing.T) { + err := sup.Start(id, spec) + if err != nil { + t.Fatalf("Failed to start sandbox: %v", err) + } + + instance, exists := sup.instances[id] + if !exists { + t.Fatal("Instance not found in supervisor map") + } + if instance.State != StateReady { + t.Errorf("Expected state Ready, got %v", instance.State) + } + }) + + // 2. Test RunCommand + t.Run("RunCommand", func(t *testing.T) { + req := api.ExecRequest{ + Command: []string{"ls", "-la"}, + } + result, err := sup.RunCommand(id, req) + if err != nil { + t.Fatalf("Failed to run command: %v", err) + } + + if result.ExitCode != 0 { + t.Errorf("Expected exit code 0, got %d", result.ExitCode) + } + }) + + // 3. Test Policy Violation + t.Run("PolicyViolation", func(t *testing.T) { + req := api.ExecRequest{ + Command: []string{"rm", "-rf", "/"}, + } + _, err := sup.RunCommand(id, req) + if err == nil { + t.Error("Expected error for forbidden command, got nil") + } + }) + + // 4. Test Stop + t.Run("Stop", func(t *testing.T) { + err := sup.Stop(id) + if err != nil { + t.Fatalf("Failed to stop sandbox: %v", err) + } + + _, exists := sup.instances[id] + if exists { + t.Error("Instance should have been removed from map after Stop") + } + }) +} From 722b6f10d409080ee102cb7647d74760225885b0 Mon Sep 17 00:00:00 2001 From: Anurag Yadav Date: Sun, 10 May 2026 23:16:35 +0530 Subject: [PATCH 2/2] refactor: address PR feedback on synchronization and robustness - Add per-instance RWMutex to SandboxInstance to prevent data races. - Implement atomic state transitions in Supervisor. - Update NewSupervisor to validate inputs and return errors. - Improve MockBackend with unique handle generation and existence checks. - Add concurrent stress tests to supervisor_test.go. --- internal/backend/mock.go | 24 ++++++- internal/supervisor/supervisor.go | 87 +++++++++++++++++++++----- internal/supervisor/supervisor_test.go | 50 +++++++++++++-- 3 files changed, 141 insertions(+), 20 deletions(-) diff --git a/internal/backend/mock.go b/internal/backend/mock.go index ab85984..eb14653 100644 --- a/internal/backend/mock.go +++ b/internal/backend/mock.go @@ -10,27 +10,41 @@ import ( type MockBackend struct { mu sync.RWMutex sandboxes map[string]api.SandboxSpec + nextID int } func NewMockBackend() *MockBackend { return &MockBackend{ sandboxes: make(map[string]api.SandboxSpec), + nextID: 1, } } func (m *MockBackend) CreateSandbox(spec api.SandboxSpec) (string, error) { m.mu.Lock() defer m.mu.Unlock() - handle := fmt.Sprintf("mock-%d", len(m.sandboxes)) + handle := fmt.Sprintf("mock-%d", m.nextID) + m.nextID++ m.sandboxes[handle] = spec return handle, nil } func (m *MockBackend) MountWorkspace(handle string, mount api.WorkspaceMount) error { + m.mu.RLock() + defer m.mu.RUnlock() + if _, exists := m.sandboxes[handle]; !exists { + return fmt.Errorf("sandbox handle not found: %s", handle) + } return nil } func (m *MockBackend) Exec(handle string, req api.ExecRequest) (api.ExecResult, error) { + m.mu.RLock() + defer m.mu.RUnlock() + if _, exists := m.sandboxes[handle]; !exists { + return api.ExecResult{}, fmt.Errorf("sandbox handle not found: %s", handle) + } + return api.ExecResult{ ExitCode: 0, Stdout: fmt.Sprintf("mock output for %v", req.Command), @@ -38,12 +52,20 @@ func (m *MockBackend) Exec(handle string, req api.ExecRequest) (api.ExecResult, } func (m *MockBackend) CopyOut(handle string, path string, dest string) error { + m.mu.RLock() + defer m.mu.RUnlock() + if _, exists := m.sandboxes[handle]; !exists { + return fmt.Errorf("sandbox handle not found: %s", handle) + } return nil } func (m *MockBackend) DestroySandbox(handle string) error { m.mu.Lock() defer m.mu.Unlock() + if _, exists := m.sandboxes[handle]; !exists { + return fmt.Errorf("sandbox handle not found: %s", handle) + } delete(m.sandboxes, handle) return nil } diff --git a/internal/supervisor/supervisor.go b/internal/supervisor/supervisor.go index 8b55b70..b311555 100644 --- a/internal/supervisor/supervisor.go +++ b/internal/supervisor/supervisor.go @@ -2,6 +2,7 @@ package supervisor import ( "errors" + "fmt" "sync" "github.com/sandforge/sandforge/internal/policy" @@ -24,6 +25,7 @@ const ( // SandboxInstance tracks the runtime state of a single sandbox. type SandboxInstance struct { + mu sync.RWMutex ID string Spec api.SandboxSpec State State @@ -31,6 +33,36 @@ type SandboxInstance struct { Error error } +func (i *SandboxInstance) SetState(s State) { + i.mu.Lock() + defer i.mu.Unlock() + i.State = s +} + +func (i *SandboxInstance) GetState() State { + i.mu.RLock() + defer i.mu.RUnlock() + return i.State +} + +func (i *SandboxInstance) SetHandle(h string) { + i.mu.Lock() + defer i.mu.Unlock() + i.Handle = h +} + +func (i *SandboxInstance) GetHandle() string { + i.mu.RLock() + defer i.mu.RUnlock() + return i.Handle +} + +func (i *SandboxInstance) SetError(err error) { + i.mu.Lock() + defer i.mu.Unlock() + i.Error = err +} + // Supervisor orchestrates sandbox lifecycles and enforces policy. type Supervisor struct { mu sync.RWMutex @@ -40,12 +72,18 @@ type Supervisor struct { policy *policy.Engine } -func NewSupervisor(backend api.SandboxBackend, engine *policy.Engine) *Supervisor { +func NewSupervisor(backend api.SandboxBackend, engine *policy.Engine) (*Supervisor, error) { + if backend == nil { + return nil, fmt.Errorf("NewSupervisor: backend is nil") + } + if engine == nil { + return nil, fmt.Errorf("NewSupervisor: policy engine is nil") + } return &Supervisor{ instances: make(map[string]*SandboxInstance), backend: backend, policy: engine, - } + }, nil } // Start will be your entry point to create and boot a sandbox. @@ -71,17 +109,17 @@ func (s *Supervisor) Start(id string, spec api.SandboxSpec) error { s.mu.Unlock() // 3. Move to 'provisioning' and call backend.CreateSandbox - instance.State = StateProvisioning + instance.SetState(StateProvisioning) handle, err := s.backend.CreateSandbox(spec) if err != nil { - instance.State = StateError - instance.Error = err + instance.SetState(StateError) + instance.SetError(err) return err } // 4. Update state to 'ready' - instance.Handle = handle - instance.State = StateReady + instance.SetHandle(handle) + instance.SetState(StateReady) return nil } @@ -98,28 +136,39 @@ func (s *Supervisor) RunCommand(id string, req api.ExecRequest) (api.ExecResult, } // 2. Validate state and policy + // We lock the instance to check state and transition atomically + instance.mu.Lock() if instance.State != StateReady { + instance.mu.Unlock() return api.ExecResult{}, errors.New("sandbox is not in 'ready' state") } + if err := s.policy.EvaluateExec(req); err != nil { + instance.mu.Unlock() return api.ExecResult{}, err } - // 3. Update state to 'executing' + // 3. Move state to 'executing' instance.State = StateExecuting + handle := instance.Handle + instance.mu.Unlock() // Ensure we go back to 'ready' unless a fatal error occurred defer func() { + instance.mu.Lock() if instance.State == StateExecuting { instance.State = StateReady } + instance.mu.Unlock() }() // 4. Call backend - result, err := s.backend.Exec(instance.Handle, req) + result, err := s.backend.Exec(handle, req) if err != nil { + instance.mu.Lock() instance.State = StateError instance.Error = err + instance.mu.Unlock() return result, err } @@ -129,27 +178,35 @@ func (s *Supervisor) RunCommand(id string, req api.ExecRequest) (api.ExecResult, // Stop will clean up the sandbox. func (s *Supervisor) Stop(id string) error { // 1. Find the instance - s.mu.Lock() // We use a full Lock because we might delete it - defer s.mu.Unlock() - + s.mu.RLock() instance, exists := s.instances[id] + s.mu.RUnlock() + if !exists { return errors.New("sandbox not found") } // 2. Move state to 'destroying' + instance.mu.Lock() + handle := instance.Handle instance.State = StateDestroying + instance.mu.Unlock() - // 3. Call backend.DestroySandbox - if err := s.backend.DestroySandbox(instance.Handle); err != nil { + // 3. Call backend.DestroySandbox (without holding the lock) + if err := s.backend.DestroySandbox(handle); err != nil { + instance.mu.Lock() instance.State = StateError instance.Error = err + instance.mu.Unlock() return err } // 4. Mark destroyed and remove from map - instance.State = StateDestroyed + s.mu.Lock() delete(s.instances, id) + s.mu.Unlock() + + instance.SetState(StateDestroyed) return nil } diff --git a/internal/supervisor/supervisor_test.go b/internal/supervisor/supervisor_test.go index ab5e86e..bcc4aa1 100644 --- a/internal/supervisor/supervisor_test.go +++ b/internal/supervisor/supervisor_test.go @@ -1,6 +1,8 @@ package supervisor import ( + "fmt" + "sync" "testing" "github.com/sandforge/sandforge/internal/backend" @@ -18,7 +20,10 @@ func TestSupervisorLifecycle(t *testing.T) { AllowedNetworkModes: []string{"offline"}, AllowedCommands: []string{"ls", "echo"}, } - sup := NewSupervisor(mockBackend, engine) + sup, err := NewSupervisor(mockBackend, engine) + if err != nil { + t.Fatalf("Failed to create supervisor: %v", err) + } spec := api.SandboxSpec{ CPU: 2, @@ -36,12 +41,15 @@ func TestSupervisorLifecycle(t *testing.T) { t.Fatalf("Failed to start sandbox: %v", err) } + sup.mu.RLock() instance, exists := sup.instances[id] + sup.mu.RUnlock() + if !exists { t.Fatal("Instance not found in supervisor map") } - if instance.State != StateReady { - t.Errorf("Expected state Ready, got %v", instance.State) + if instance.GetState() != StateReady { + t.Errorf("Expected state Ready, got %v", instance.GetState()) } }) @@ -71,14 +79,48 @@ func TestSupervisorLifecycle(t *testing.T) { } }) - // 4. Test Stop + // 4. Test Concurrent Access + t.Run("ConcurrentAccess", func(t *testing.T) { + var wg sync.WaitGroup + numWorkers := 10 + idPrefix := "concurrent-" + + // Start multiple sandboxes + for i := 0; i < numWorkers; i++ { + wg.Add(1) + go func(idx int) { + defer wg.Done() + cid := fmt.Sprintf("%s%d", idPrefix, idx) + if err := sup.Start(cid, spec); err != nil { + t.Errorf("Failed to start concurrent sandbox %d: %v", idx, err) + } + + // Run a command + req := api.ExecRequest{Command: []string{"echo", "hello"}} + if _, err := sup.RunCommand(cid, req); err != nil { + t.Errorf("Failed to run command in concurrent sandbox %d: %v", idx, err) + } + + // Stop + if err := sup.Stop(cid); err != nil { + t.Errorf("Failed to stop concurrent sandbox %d: %v", idx, err) + } + }(i) + } + wg.Wait() + }) + + // 5. Test Stop t.Run("Stop", func(t *testing.T) { err := sup.Stop(id) if err != nil { t.Fatalf("Failed to stop sandbox: %v", err) } + sup.mu.RLock() _, exists := sup.instances[id] + sup.mu.RUnlock() + if exists { t.Error("Instance should have been removed from map after Stop") }