|
8 | 8 | "github.com/onkernel/hypeman/lib/logger" |
9 | 9 | "github.com/onkernel/hypeman/lib/network" |
10 | 10 | "go.opentelemetry.io/otel/trace" |
| 11 | + "gvisor.dev/gvisor/pkg/cleanup" |
11 | 12 | ) |
12 | 13 |
|
13 | 14 | // startInstance starts a stopped instance |
@@ -52,46 +53,53 @@ func (m *manager) startInstance( |
52 | 53 | return nil, fmt.Errorf("get image: %w", err) |
53 | 54 | } |
54 | 55 |
|
55 | | - // 4. Recreate network allocation if network enabled |
| 56 | + // Setup cleanup stack for automatic rollback on errors |
| 57 | + cu := cleanup.Make(func() {}) |
| 58 | + defer cu.Clean() |
| 59 | + |
| 60 | + // 4. Allocate fresh network if network enabled |
56 | 61 | var netConfig *network.NetworkConfig |
57 | 62 | if stored.NetworkEnabled { |
58 | | - log.DebugContext(ctx, "recreating network for start", "instance_id", id, "network", "default") |
59 | | - if err := m.networkManager.RecreateAllocation(ctx, id); err != nil { |
60 | | - log.ErrorContext(ctx, "failed to recreate network", "instance_id", id, "error", err) |
61 | | - return nil, fmt.Errorf("recreate network: %w", err) |
62 | | - } |
63 | | - // Get the network config for VM configuration |
64 | | - netAlloc, err := m.networkManager.GetAllocation(ctx, id) |
| 63 | + log.DebugContext(ctx, "allocating network for start", "instance_id", id, "network", "default") |
| 64 | + netConfig, err = m.networkManager.CreateAllocation(ctx, network.AllocateRequest{ |
| 65 | + InstanceID: id, |
| 66 | + InstanceName: stored.Name, |
| 67 | + }) |
65 | 68 | if err != nil { |
66 | | - log.ErrorContext(ctx, "failed to get network allocation", "instance_id", id, "error", err) |
67 | | - // Cleanup network on failure |
68 | | - if netAlloc != nil { |
69 | | - m.networkManager.ReleaseAllocation(ctx, netAlloc) |
70 | | - } |
71 | | - return nil, fmt.Errorf("get network allocation: %w", err) |
72 | | - } |
73 | | - netConfig = &network.NetworkConfig{ |
74 | | - TAPDevice: netAlloc.TAPDevice, |
75 | | - IP: netAlloc.IP, |
76 | | - MAC: netAlloc.MAC, |
77 | | - Netmask: "255.255.255.0", // Default netmask |
| 69 | + log.ErrorContext(ctx, "failed to allocate network", "instance_id", id, "error", err) |
| 70 | + return nil, fmt.Errorf("allocate network: %w", err) |
78 | 71 | } |
| 72 | + // Update stored metadata with new IP/MAC |
| 73 | + stored.IP = netConfig.IP |
| 74 | + stored.MAC = netConfig.MAC |
| 75 | + // Add network cleanup to stack |
| 76 | + cu.Add(func() { |
| 77 | + m.networkManager.ReleaseAllocation(ctx, &network.Allocation{ |
| 78 | + InstanceID: id, |
| 79 | + TAPDevice: netConfig.TAPDevice, |
| 80 | + }) |
| 81 | + }) |
79 | 82 | } |
80 | 83 |
|
81 | | - // 5. Start VMM and boot VM (reuses logic from create) |
| 84 | + // 5. Regenerate config disk with new network configuration |
| 85 | + instForConfig := &Instance{StoredMetadata: *stored} |
| 86 | + log.DebugContext(ctx, "regenerating config disk", "instance_id", id) |
| 87 | + if err := m.createConfigDisk(instForConfig, imageInfo, netConfig); err != nil { |
| 88 | + log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err) |
| 89 | + return nil, fmt.Errorf("create config disk: %w", err) |
| 90 | + } |
| 91 | + |
| 92 | + // 6. Start VMM and boot VM (reuses logic from create) |
82 | 93 | log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id) |
83 | 94 | if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil { |
84 | 95 | log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err) |
85 | | - // Cleanup network on failure |
86 | | - if stored.NetworkEnabled { |
87 | | - if netAlloc, err := m.networkManager.GetAllocation(ctx, id); err == nil { |
88 | | - m.networkManager.ReleaseAllocation(ctx, netAlloc) |
89 | | - } |
90 | | - } |
91 | 96 | return nil, err |
92 | 97 | } |
93 | 98 |
|
94 | | - // 6. Update metadata (set PID, StartedAt) |
| 99 | + // Success - release cleanup stack (prevent cleanup) |
| 100 | + cu.Release() |
| 101 | + |
| 102 | + // 7. Update metadata (set PID, StartedAt) |
95 | 103 | now := time.Now() |
96 | 104 | stored.StartedAt = &now |
97 | 105 |
|
|
0 commit comments