Skip to content

Commit 149c5e5

Browse files
authored
feat: implement lifecycle supervisor and mock backend driver (#10)
* feat: implement Sandbox Supervisor and Mock Backend - Add Supervisor state machine and lifecycle management (Start, RunCommand, Stop). - Implement MockBackend for testing the orchestration layer without a real VM. - Add comprehensive unit tests covering state transitions, concurrency safety (sync.RWMutex), and policy enforcement. Resolves: #9 * refactor: address PR feedback on synchronization and robustness - Add per-instance RWMutex to SandboxInstance to prevent data races. - Implement atomic state transitions in Supervisor. - Update NewSupervisor to validate inputs and return errors. - Improve MockBackend with unique handle generation and existence checks. - Add concurrent stress tests to supervisor_test.go.
1 parent bda95f2 commit 149c5e5

3 files changed

Lines changed: 411 additions & 0 deletions

File tree

internal/backend/mock.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
package backend
2+
3+
import (
4+
"fmt"
5+
"sync"
6+
7+
"github.com/sandforge/sandforge/pkg/api"
8+
)
9+
10+
type MockBackend struct {
11+
mu sync.RWMutex
12+
sandboxes map[string]api.SandboxSpec
13+
nextID int
14+
}
15+
16+
func NewMockBackend() *MockBackend {
17+
return &MockBackend{
18+
sandboxes: make(map[string]api.SandboxSpec),
19+
nextID: 1,
20+
}
21+
}
22+
23+
func (m *MockBackend) CreateSandbox(spec api.SandboxSpec) (string, error) {
24+
m.mu.Lock()
25+
defer m.mu.Unlock()
26+
handle := fmt.Sprintf("mock-%d", m.nextID)
27+
m.nextID++
28+
m.sandboxes[handle] = spec
29+
return handle, nil
30+
}
31+
32+
func (m *MockBackend) MountWorkspace(handle string, mount api.WorkspaceMount) error {
33+
m.mu.RLock()
34+
defer m.mu.RUnlock()
35+
if _, exists := m.sandboxes[handle]; !exists {
36+
return fmt.Errorf("sandbox handle not found: %s", handle)
37+
}
38+
return nil
39+
}
40+
41+
func (m *MockBackend) Exec(handle string, req api.ExecRequest) (api.ExecResult, error) {
42+
m.mu.RLock()
43+
defer m.mu.RUnlock()
44+
if _, exists := m.sandboxes[handle]; !exists {
45+
return api.ExecResult{}, fmt.Errorf("sandbox handle not found: %s", handle)
46+
}
47+
48+
return api.ExecResult{
49+
ExitCode: 0,
50+
Stdout: fmt.Sprintf("mock output for %v", req.Command),
51+
}, nil
52+
}
53+
54+
func (m *MockBackend) CopyOut(handle string, path string, dest string) error {
55+
m.mu.RLock()
56+
defer m.mu.RUnlock()
57+
if _, exists := m.sandboxes[handle]; !exists {
58+
return fmt.Errorf("sandbox handle not found: %s", handle)
59+
}
60+
return nil
61+
}
62+
63+
func (m *MockBackend) DestroySandbox(handle string) error {
64+
m.mu.Lock()
65+
defer m.mu.Unlock()
66+
if _, exists := m.sandboxes[handle]; !exists {
67+
return fmt.Errorf("sandbox handle not found: %s", handle)
68+
}
69+
delete(m.sandboxes, handle)
70+
return nil
71+
}

internal/supervisor/supervisor.go

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
package supervisor
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"sync"
7+
8+
"github.com/sandforge/sandforge/internal/policy"
9+
"github.com/sandforge/sandforge/pkg/api"
10+
)
11+
12+
// State represents the current lifecycle phase of a sandbox.
13+
type State string
14+
15+
const (
16+
StateRequested State = "requested"
17+
StateProvisioning State = "provisioning"
18+
StateReady State = "ready"
19+
StateExecuting State = "executing"
20+
StateCopyingArtifacts State = "copying_artifacts"
21+
StateDestroying State = "destroying"
22+
StateDestroyed State = "destroyed"
23+
StateError State = "error"
24+
)
25+
26+
// SandboxInstance tracks the runtime state of a single sandbox.
27+
type SandboxInstance struct {
28+
mu sync.RWMutex
29+
ID string
30+
Spec api.SandboxSpec
31+
State State
32+
Handle string // The backend-specific identifier
33+
Error error
34+
}
35+
36+
func (i *SandboxInstance) SetState(s State) {
37+
i.mu.Lock()
38+
defer i.mu.Unlock()
39+
i.State = s
40+
}
41+
42+
func (i *SandboxInstance) GetState() State {
43+
i.mu.RLock()
44+
defer i.mu.RUnlock()
45+
return i.State
46+
}
47+
48+
func (i *SandboxInstance) SetHandle(h string) {
49+
i.mu.Lock()
50+
defer i.mu.Unlock()
51+
i.Handle = h
52+
}
53+
54+
func (i *SandboxInstance) GetHandle() string {
55+
i.mu.RLock()
56+
defer i.mu.RUnlock()
57+
return i.Handle
58+
}
59+
60+
func (i *SandboxInstance) SetError(err error) {
61+
i.mu.Lock()
62+
defer i.mu.Unlock()
63+
i.Error = err
64+
}
65+
66+
// Supervisor orchestrates sandbox lifecycles and enforces policy.
67+
type Supervisor struct {
68+
mu sync.RWMutex
69+
instances map[string]*SandboxInstance
70+
71+
backend api.SandboxBackend
72+
policy *policy.Engine
73+
}
74+
75+
func NewSupervisor(backend api.SandboxBackend, engine *policy.Engine) (*Supervisor, error) {
76+
if backend == nil {
77+
return nil, fmt.Errorf("NewSupervisor: backend is nil")
78+
}
79+
if engine == nil {
80+
return nil, fmt.Errorf("NewSupervisor: policy engine is nil")
81+
}
82+
return &Supervisor{
83+
instances: make(map[string]*SandboxInstance),
84+
backend: backend,
85+
policy: engine,
86+
}, nil
87+
}
88+
89+
// Start will be your entry point to create and boot a sandbox.
90+
func (s *Supervisor) Start(id string, spec api.SandboxSpec) error {
91+
// 1. Evaluate policy
92+
if err := s.policy.EvaluateSandbox(spec); err != nil {
93+
return err
94+
}
95+
96+
// 2. Register instance in 'requested' state
97+
s.mu.Lock()
98+
if _, exists := s.instances[id]; exists {
99+
s.mu.Unlock()
100+
return errors.New("sandbox ID already exists")
101+
}
102+
103+
instance := &SandboxInstance{
104+
ID: id,
105+
Spec: spec,
106+
State: StateRequested,
107+
}
108+
s.instances[id] = instance
109+
s.mu.Unlock()
110+
111+
// 3. Move to 'provisioning' and call backend.CreateSandbox
112+
instance.SetState(StateProvisioning)
113+
handle, err := s.backend.CreateSandbox(spec)
114+
if err != nil {
115+
instance.SetState(StateError)
116+
instance.SetError(err)
117+
return err
118+
}
119+
120+
// 4. Update state to 'ready'
121+
instance.SetHandle(handle)
122+
instance.SetState(StateReady)
123+
124+
return nil
125+
}
126+
127+
// RunCommand will be used to execute something in a ready sandbox.
128+
func (s *Supervisor) RunCommand(id string, req api.ExecRequest) (api.ExecResult, error) {
129+
// 1. Find the instance
130+
s.mu.RLock()
131+
instance, exists := s.instances[id]
132+
s.mu.RUnlock()
133+
134+
if !exists {
135+
return api.ExecResult{}, errors.New("sandbox not found")
136+
}
137+
138+
// 2. Validate state and policy
139+
// We lock the instance to check state and transition atomically
140+
instance.mu.Lock()
141+
if instance.State != StateReady {
142+
instance.mu.Unlock()
143+
return api.ExecResult{}, errors.New("sandbox is not in 'ready' state")
144+
}
145+
146+
if err := s.policy.EvaluateExec(req); err != nil {
147+
instance.mu.Unlock()
148+
return api.ExecResult{}, err
149+
}
150+
151+
// 3. Move state to 'executing'
152+
instance.State = StateExecuting
153+
handle := instance.Handle
154+
instance.mu.Unlock()
155+
156+
// Ensure we go back to 'ready' unless a fatal error occurred
157+
defer func() {
158+
instance.mu.Lock()
159+
if instance.State == StateExecuting {
160+
instance.State = StateReady
161+
}
162+
instance.mu.Unlock()
163+
}()
164+
165+
// 4. Call backend
166+
result, err := s.backend.Exec(handle, req)
167+
if err != nil {
168+
instance.mu.Lock()
169+
instance.State = StateError
170+
instance.Error = err
171+
instance.mu.Unlock()
172+
return result, err
173+
}
174+
175+
return result, nil
176+
}
177+
178+
// Stop will clean up the sandbox.
179+
func (s *Supervisor) Stop(id string) error {
180+
// 1. Find the instance
181+
s.mu.RLock()
182+
instance, exists := s.instances[id]
183+
s.mu.RUnlock()
184+
185+
if !exists {
186+
return errors.New("sandbox not found")
187+
}
188+
189+
// 2. Move state to 'destroying'
190+
instance.mu.Lock()
191+
handle := instance.Handle
192+
instance.State = StateDestroying
193+
instance.mu.Unlock()
194+
195+
// 3. Call backend.DestroySandbox (without holding the lock)
196+
if err := s.backend.DestroySandbox(handle); err != nil {
197+
instance.mu.Lock()
198+
instance.State = StateError
199+
instance.Error = err
200+
instance.mu.Unlock()
201+
return err
202+
}
203+
204+
// 4. Mark destroyed and remove from map
205+
s.mu.Lock()
206+
delete(s.instances, id)
207+
s.mu.Unlock()
208+
209+
instance.SetState(StateDestroyed)
210+
211+
return nil
212+
}

0 commit comments

Comments
 (0)