Skip to content

Commit 8ffc63a

Browse files
committed
fix(kiloclaw): reverse region list on capacity recovery instead of deprioritizing
deprioritizeRegion compared the concrete region (e.g. 'ord') against meta-regions ('us', 'eu'), so it never matched and the recovery volume landed back in the same exhausted region. Reverse the configured region list instead so recovery deterministically tries the opposite geographic region first.
1 parent 8094556 commit 8ffc63a

2 files changed

Lines changed: 11 additions & 13 deletions

File tree

kiloclaw/src/durable-objects/kiloclaw-instance.test.ts

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2128,7 +2128,7 @@ describe('start: volume region validation', () => {
21282128
// ============================================================================
21292129

21302130
describe('start: 412 insufficient resources recovery', () => {
2131-
it('fresh provision (never started): deletes volume and creates fresh with deprioritized regions', async () => {
2131+
it('fresh provision (never started): deletes volume and creates fresh with reversed regions', async () => {
21322132
const { instance, storage } = createInstance();
21332133
await seedProvisioned(storage, { flyMachineId: null, lastStartedAt: null });
21342134

@@ -2150,16 +2150,15 @@ describe('start: 412 insufficient resources recovery', () => {
21502150

21512151
// Old volume was deleted
21522152
expect(flyClient.deleteVolume).toHaveBeenCalledWith(expect.anything(), 'vol-1');
2153-
// New volume created via fallback with deprioritized regions and compute hint
2153+
// New volume created via fallback with reversed regions and compute hint
21542154
const regions412Call = (flyClient.createVolumeWithFallback as Mock).mock.calls[0];
21552155
expect(regions412Call[1]).toEqual(
21562156
expect.objectContaining({
21572157
compute: expect.objectContaining({ cpus: 2, memory_mb: 3072 }) as unknown,
21582158
})
21592159
);
2160-
// Regions are shuffled, so just check the set (deprioritize is a no-op here
2161-
// because 'iad' is not in FLY_REGION='us,eu')
2162-
expect((regions412Call[2] as string[]).sort()).toEqual(['eu', 'us']);
2160+
// Regions are reversed from FLY_REGION='us,eu' so EU is tried first on recovery
2161+
expect(regions412Call[2]).toEqual(['eu', 'us']);
21632162
// source_volume_id should NOT be set for fresh provision
21642163
const createVolumeCall = (flyClient.createVolumeWithFallback as Mock).mock
21652164
.calls[0][1] as Record<string, unknown>;
@@ -2197,16 +2196,16 @@ describe('start: 412 insufficient resources recovery', () => {
21972196

21982197
await instance.start('user-1');
21992198

2200-
// Volume was forked (source_volume_id set) with compute hint and deprioritized regions
2199+
// Volume was forked (source_volume_id set) with compute hint and reversed regions
22012200
const regionsForkCall = (flyClient.createVolumeWithFallback as Mock).mock.calls[0];
22022201
expect(regionsForkCall[1]).toEqual(
22032202
expect.objectContaining({
22042203
source_volume_id: 'vol-1',
22052204
compute: expect.objectContaining({ cpus: 2, memory_mb: 3072 }) as unknown,
22062205
})
22072206
);
2208-
// Regions are shuffled — check the set
2209-
expect((regionsForkCall[2] as string[]).sort()).toEqual(['eu', 'us']);
2207+
// Regions are reversed from FLY_REGION='us,eu' so EU is tried first on recovery
2208+
expect(regionsForkCall[2]).toEqual(['eu', 'us']);
22102209
// Old volume was deleted
22112210
expect(flyClient.deleteVolume).toHaveBeenCalledWith(expect.anything(), 'vol-1');
22122211
// Machine was retried
@@ -2270,8 +2269,8 @@ describe('start: 412 insufficient resources recovery', () => {
22702269
compute: expect.objectContaining({ cpus: 2, memory_mb: 3072 }) as unknown,
22712270
})
22722271
);
2273-
// Regions are shuffled then deprioritized — check the set
2274-
expect((regionsUpdateCall[2] as string[]).sort()).toEqual(['eu', 'us']);
2272+
// Regions are reversed from FLY_REGION='us,eu' so EU is tried first on recovery
2273+
expect(regionsUpdateCall[2]).toEqual(['eu', 'us']);
22752274
// New machine was created
22762275
expect(storage._store.get('flyMachineId')).toBe('machine-new');
22772276
expect(storage._store.get('flyVolumeId')).toBe('vol-new');

kiloclaw/src/durable-objects/kiloclaw-instance/fly-machines.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import {
88
DEFAULT_FLY_REGION,
99
STALE_PROVISION_THRESHOLD_MS,
1010
} from '../../config';
11-
import { parseRegions, shuffleRegions, deprioritizeRegion } from '../regions';
11+
import { parseRegions, shuffleRegions } from '../regions';
1212
import { guestFromSize, volumeNameFromSandboxId } from '../machine-config';
1313
import type { InstanceMutableState } from './types';
1414
import { storageUpdate } from './state';
@@ -65,8 +65,7 @@ export async function replaceStrandedVolume(
6565
const oldVolumeId = state.flyVolumeId;
6666
const oldRegion = state.flyRegion;
6767
const hasUserData = state.lastStartedAt !== null;
68-
const allRegions = shuffleRegions(parseRegions(env.FLY_REGION ?? DEFAULT_FLY_REGION));
69-
const regions = deprioritizeRegion(allRegions, oldRegion);
68+
const regions = parseRegions(env.FLY_REGION ?? DEFAULT_FLY_REGION).reverse();
7069
const compute = guestFromSize(state.machineSize);
7170

7271
// Destroy existing machine if any — it's stuck on the constrained host.

0 commit comments

Comments
 (0)