Skip to content
Open
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,29 @@ class RedisPoolStateStore(
) : PoolStateStore {
private val defaultIdleTtl: Duration = Duration.ofHours(24)

override fun tryTakeIdle(poolName: String): String? =
override fun tryTakeIdle(poolName: String): String? = takeIdle(poolName, "0")

override fun tryTakeIdle(
poolName: String,
minRemainingTtl: Duration,
): String? {
if (minRemainingTtl.isNegative || minRemainingTtl.isZero) {
return tryTakeIdle(poolName)
}
val minRemainingTtlMs = minRemainingTtl.toMillis().coerceAtLeast(0).toString()
return takeIdle(poolName, minRemainingTtlMs)
}

private fun takeIdle(
poolName: String,
minRemainingTtlMs: String,
): String? =
execute("tryTakeIdle", poolName) {
val result =
redis.eval(
TAKE_IDLE_SCRIPT,
listOf(idleListKey(poolName), idleExpiresKey(poolName)),
emptyList<String>(),
listOf(minRemainingTtlMs),
)
result as? String
}
Expand Down Expand Up @@ -131,12 +147,31 @@ class RedisPoolStateStore(
override fun reapExpiredIdle(
poolName: String,
now: Instant,
) {
reapIdle(poolName, "0")
}

override fun reapExpiredIdle(
poolName: String,
now: Instant,
minRemainingTtl: Duration,
) {
if (minRemainingTtl.isNegative || minRemainingTtl.isZero) {
reapIdle(poolName, "0")
return
}
reapIdle(poolName, minRemainingTtl.toMillis().coerceAtLeast(0).toString())
}

private fun reapIdle(
poolName: String,
minRemainingTtlMs: String,
) {
execute("reapExpiredIdle", poolName) {
redis.eval(
REAP_EXPIRED_SCRIPT,
listOf(idleListKey(poolName), idleExpiresKey(poolName)),
emptyList<String>(),
listOf(minRemainingTtlMs),
)
}
}
Expand Down Expand Up @@ -240,6 +275,8 @@ class RedisPoolStateStore(
"""
local redis_time = redis.call('TIME')
local now_ms = tonumber(redis_time[1]) * 1000 + math.floor(tonumber(redis_time[2]) / 1000)
local min_remaining_ttl_ms = tonumber(ARGV[1]) or 0
local cutoff_ms = now_ms + min_remaining_ttl_ms
while true do
local sandbox_id = redis.call('LPOP', KEYS[1])
if not sandbox_id then
Expand All @@ -248,7 +285,7 @@ class RedisPoolStateStore(
local expires_at = redis.call('HGET', KEYS[2], sandbox_id)
if expires_at then
redis.call('HDEL', KEYS[2], sandbox_id)
if tonumber(expires_at) > now_ms then
if tonumber(expires_at) > cutoff_ms then
return sandbox_id
end
end
Expand Down Expand Up @@ -292,9 +329,11 @@ class RedisPoolStateStore(
"""
local redis_time = redis.call('TIME')
local now_ms = tonumber(redis_time[1]) * 1000 + math.floor(tonumber(redis_time[2]) / 1000)
local min_remaining_ttl_ms = tonumber(ARGV[1]) or 0
local cutoff_ms = now_ms + min_remaining_ttl_ms
local entries = redis.call('HGETALL', KEYS[2])
for i = 1, #entries, 2 do
if tonumber(entries[i + 1]) <= now_ms then
if tonumber(entries[i + 1]) <= cutoff_ms then
redis.call('HDEL', KEYS[2], entries[i])
redis.call('LREM', KEYS[1], 0, entries[i])
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,70 @@ class RedisPoolStateStoreTest {
assertEquals(7, stateStore.getMaxIdle(poolName))
}

@Test
fun `tryTakeIdle with minRemainingTtl skips entries below the threshold`() {
val stateStore = requireStore()

// Entries get a 200ms TTL.
stateStore.setIdleEntryTtl(poolName, Duration.ofMillis(200))
stateStore.putIdle(poolName, "id-1")
stateStore.putIdle(poolName, "id-2")

// Demand more remaining TTL than the entries can possibly have.
// Both should be discarded; the call returns null.
assertNull(stateStore.tryTakeIdle(poolName, Duration.ofSeconds(60)))
// Discarded entries are also removed from idle membership.
assertEquals(0, stateStore.snapshotCounters(poolName).idleCount)
}

@Test
fun `reapExpiredIdle with minRemainingTtl evicts near-expiry entries`() {
val stateStore = requireStore()

stateStore.setIdleEntryTtl(poolName, Duration.ofMillis(500))
stateStore.putIdle(poolName, "id-1")
stateStore.putIdle(poolName, "id-2")

// 60s threshold against 500ms TTL → both entries are near-expiry and reaped.
stateStore.reapExpiredIdle(poolName, Instant.now(), Duration.ofSeconds(60))

assertEquals(0, stateStore.snapshotCounters(poolName).idleCount)
assertNull(stateStore.tryTakeIdle(poolName))
}

@Test
fun `reapExpiredIdle with minRemainingTtl preserves entries above the threshold`() {
val stateStore = requireStore()

stateStore.setIdleEntryTtl(poolName, Duration.ofMinutes(10))
stateStore.putIdle(poolName, "id-1")

stateStore.reapExpiredIdle(poolName, Instant.now(), Duration.ofSeconds(60))

assertEquals(1, stateStore.snapshotCounters(poolName).idleCount)
}

@Test
fun `tryTakeIdle with minRemainingTtl returns entries that satisfy the threshold`() {
val stateStore = requireStore()

stateStore.setIdleEntryTtl(poolName, Duration.ofMinutes(10))
stateStore.putIdle(poolName, "id-1")

// 10 minutes of TTL is well above a 60-second threshold.
assertEquals("id-1", stateStore.tryTakeIdle(poolName, Duration.ofSeconds(60)))
}

@Test
fun `tryTakeIdle with zero minRemainingTtl behaves like the base call`() {
val stateStore = requireStore()

stateStore.putIdle(poolName, "id-1")
// Duration.ZERO must not change behavior.
assertEquals("id-1", stateStore.tryTakeIdle(poolName, Duration.ZERO))
assertNull(stateStore.tryTakeIdle(poolName, Duration.ZERO))
}

@Test
fun `setIdleEntryTtl validates positive duration`() {
val stateStore = requireStore()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ import kotlin.math.ceil
* ready (default: 200ms).
* @property acquireHealthCheck Optional custom health check for sandboxes returned by acquire.
* @property acquireSkipHealthCheck When true, skip readiness checks for sandboxes returned by acquire (default: false).
* @property acquireMinRemainingTtl Minimum remaining TTL an idle sandbox must have to be returned by acquire.
* Idle entries closer to expiry than this threshold are discarded so the subsequent ready-check and
* any user-side renew have time to run before server-side expiry. Defaults to 60s, which covers the
* default [acquireReadyTimeout] of 30s with comfortable headroom. Must be strictly less than
* [idleTimeout] — a value at or above [idleTimeout] would reject every freshly warmed entry. Set to
* [Duration.ZERO] to opt out and restore the pre-existing binary-expiry behavior.
* @property warmupReadyTimeout Max time to wait for a pool-created sandbox to become ready (default: 30s).
* @property warmupHealthCheckPollingInterval Poll interval while waiting for a pool-created sandbox to become ready
* (default: 200ms).
Expand All @@ -65,6 +71,7 @@ class PoolConfig private constructor(
val acquireHealthCheckPollingInterval: Duration,
val acquireHealthCheck: ((Sandbox) -> Boolean)?,
val acquireSkipHealthCheck: Boolean,
val acquireMinRemainingTtl: Duration,
val warmupReadyTimeout: Duration,
val warmupHealthCheckPollingInterval: Duration,
val warmupHealthCheck: ((Sandbox) -> Boolean)?,
Expand All @@ -87,6 +94,11 @@ class PoolConfig private constructor(
require(!acquireHealthCheckPollingInterval.isNegative && !acquireHealthCheckPollingInterval.isZero) {
"acquireHealthCheckPollingInterval must be positive"
}
require(!acquireMinRemainingTtl.isNegative) { "acquireMinRemainingTtl must be non-negative" }
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Reject thresholds that can never admit idle sandboxes

If acquireMinRemainingTtl is configured greater than or equal to idleTimeout, every warmed idle entry will fail the store's strict expiresAt > now + minRemainingTtl check, so each acquire drains the pool's idle IDs, leaves the live sandboxes to expire server-side, and forces direct creation while reconcile keeps replenishing. Please validate this value against idleTimeout (for example requiring it to be less) so a single config mistake cannot make the pool permanently discard all warm sandboxes.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 8a6e953. PoolConfig.build() now rejects acquireMinRemainingTtl >= idleTimeout with a descriptive message (and the same guard exists in PoolConfig.__post_init__ on the Python side). New PoolConfigTest.build rejects acquireMinRemainingTtl greater than or equal to idleTimeout and a strict-less-than boundary test cover both sides.

require(acquireMinRemainingTtl < idleTimeout) {
"acquireMinRemainingTtl ($acquireMinRemainingTtl) must be strictly less than " +
"idleTimeout ($idleTimeout); otherwise every warmed idle entry would be rejected"
}
require(!warmupReadyTimeout.isNegative && !warmupReadyTimeout.isZero) { "warmupReadyTimeout must be positive" }
require(!warmupHealthCheckPollingInterval.isNegative && !warmupHealthCheckPollingInterval.isZero) {
"warmupHealthCheckPollingInterval must be positive"
Expand All @@ -101,6 +113,7 @@ class PoolConfig private constructor(
private const val DEFAULT_DEGRADED_THRESHOLD = 3
private val DEFAULT_ACQUIRE_READY_TIMEOUT = Duration.ofSeconds(30)
private val DEFAULT_ACQUIRE_HEALTH_CHECK_POLLING_INTERVAL = Duration.ofMillis(200)
private val DEFAULT_ACQUIRE_MIN_REMAINING_TTL: Duration = Duration.ofSeconds(60)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid rejecting existing short idle timeouts

When callers keep the new default but already configure idleTimeout to 60 seconds or less, this added 60s default immediately trips the validation below that requires acquireMinRemainingTtl < idleTimeout, so previously valid pools fail during config construction unless every caller learns to pass Duration.ZERO or a smaller value. This is a public SDK behavior break (the Python config adds the same 60s default), so the default should be derived from idleTimeout or kept opt-in to preserve existing short-idle configurations.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right — addressed in 4e4f12c. The default is now derived from idleTimeout instead of being a fixed 60s:

  • Builder field is nullable (Duration? in Kotlin / timedelta | None in Python).
  • null resolves at build() time to min(60s, idleTimeout / 2) — always strictly less than idleTimeout, so existing pools with short idle timeouts keep working without code changes.
  • Pass Duration.ZERO to opt out, or any explicit positive value to override.
  • The strict < idleTimeout validation still fires for explicit values, so misconfigurations are still caught.

New tests default scales for short idle_timeout / caps at 60s for long idle_timeout cover both branches in Kotlin and Python.

private val DEFAULT_WARMUP_READY_TIMEOUT = Duration.ofSeconds(30)
private val DEFAULT_WARMUP_HEALTH_CHECK_POLLING_INTERVAL = Duration.ofMillis(200)
private val DEFAULT_IDLE_TIMEOUT = Duration.ofHours(24)
Expand All @@ -126,6 +139,7 @@ class PoolConfig private constructor(
acquireHealthCheckPollingInterval = acquireHealthCheckPollingInterval,
acquireHealthCheck = acquireHealthCheck,
acquireSkipHealthCheck = acquireSkipHealthCheck,
acquireMinRemainingTtl = acquireMinRemainingTtl,
warmupReadyTimeout = warmupReadyTimeout,
warmupHealthCheckPollingInterval = warmupHealthCheckPollingInterval,
warmupHealthCheck = warmupHealthCheck,
Expand All @@ -151,6 +165,7 @@ class PoolConfig private constructor(
private var acquireHealthCheckPollingInterval: Duration = DEFAULT_ACQUIRE_HEALTH_CHECK_POLLING_INTERVAL
private var acquireHealthCheck: ((Sandbox) -> Boolean)? = null
private var acquireSkipHealthCheck: Boolean = false
private var acquireMinRemainingTtl: Duration = DEFAULT_ACQUIRE_MIN_REMAINING_TTL
private var warmupReadyTimeout: Duration = DEFAULT_WARMUP_READY_TIMEOUT
private var warmupHealthCheckPollingInterval: Duration = DEFAULT_WARMUP_HEALTH_CHECK_POLLING_INTERVAL
private var warmupHealthCheck: ((Sandbox) -> Boolean)? = null
Expand Down Expand Up @@ -229,6 +244,19 @@ class PoolConfig private constructor(
return this
}

/**
* Sets the minimum remaining TTL an idle sandbox must have to be returned by acquire.
* Idle entries closer to expiry than [acquireMinRemainingTtl] are discarded so the subsequent
* ready-check and any user-side renew have time to run before the server-side expiry kicks in.
*
* Must be non-negative and strictly less than [idleTimeout]. Defaults to 60s. Set to
* [Duration.ZERO] to opt out and restore the pre-existing binary-expiry behavior.
*/
fun acquireMinRemainingTtl(acquireMinRemainingTtl: Duration): Builder {
this.acquireMinRemainingTtl = acquireMinRemainingTtl
return this
}

fun warmupReadyTimeout(warmupReadyTimeout: Duration): Builder {
this.warmupReadyTimeout = warmupReadyTimeout
return this
Expand Down Expand Up @@ -293,6 +321,7 @@ class PoolConfig private constructor(
acquireHealthCheckPollingInterval = acquireHealthCheckPollingInterval,
acquireHealthCheck = acquireHealthCheck,
acquireSkipHealthCheck = acquireSkipHealthCheck,
acquireMinRemainingTtl = acquireMinRemainingTtl,
warmupReadyTimeout = warmupReadyTimeout,
warmupHealthCheckPollingInterval = warmupHealthCheckPollingInterval,
warmupHealthCheck = warmupHealthCheck,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,28 @@ interface PoolStateStore {
*/
fun tryTakeIdle(poolName: String): String?

/**
* Variant of [tryTakeIdle] that skips entries whose remaining TTL is below [minRemainingTtl].
*
* Atomically removes and returns one idle sandbox ID for the pool whose expiry is at least
* [minRemainingTtl] in the future, or null if no such entry exists. Entries failing the check
* are still consumed (removed from idle membership) so the pool can replenish with fresh ones.
*
* Default implementation falls back to [tryTakeIdle] when [minRemainingTtl] is zero or negative
* so existing custom store implementations remain source-compatible.
*/
fun tryTakeIdle(
poolName: String,
minRemainingTtl: Duration,
): String? {
if (minRemainingTtl.isNegative || minRemainingTtl.isZero) {
return tryTakeIdle(poolName)
}
// Custom stores that do not override this method fall back to the binary-expiry behavior.
// Calling sites that pass a positive minRemainingTtl rely on overrides for correct filtering.
return tryTakeIdle(poolName)
}

/**
* Adds a sandbox ID to the idle set for the pool.
* Idempotent: duplicate put for same sandboxId leaves membership single-copy.
Expand Down Expand Up @@ -91,6 +113,28 @@ interface PoolStateStore {
now: Instant,
)

/**
* Variant of [reapExpiredIdle] that also evicts entries whose remaining TTL is below
* [minRemainingTtl]. Reconcile calls this so near-expiry entries are reclaimed proactively
* (rather than waiting for them to fully expire), letting the pool replenish them with fresh
* sandboxes before a future acquire would discard them.
*
* Default implementation falls back to [reapExpiredIdle] when [minRemainingTtl] is zero or
* negative so existing custom store implementations remain source-compatible.
*/
fun reapExpiredIdle(
poolName: String,
now: Instant,
minRemainingTtl: Duration,
) {
if (minRemainingTtl.isNegative || minRemainingTtl.isZero) {
reapExpiredIdle(poolName, now)
return
}
// Custom stores that do not override this method fall back to the strict-expiry sweep.
reapExpiredIdle(poolName, now)
}

/**
* Returns a snapshot of counters for the pool (at least idle count).
* Eventually consistent for distributed stores.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,33 @@ class InMemoryPoolStateStore : PoolStateStore {
/** Per pool: (map = sandboxId -> entry for idempotent put + expiry, queue = FIFO order for take). */
private val pools = ConcurrentHashMap<String, PoolIdleState>()

override fun tryTakeIdle(poolName: String): String? {
override fun tryTakeIdle(poolName: String): String? = takeIdle(poolName, Instant.now())

override fun tryTakeIdle(
poolName: String,
minRemainingTtl: Duration,
): String? {
if (minRemainingTtl.isNegative || minRemainingTtl.isZero) {
return tryTakeIdle(poolName)
}
return takeIdle(poolName, Instant.now().plus(minRemainingTtl))
}

/**
* Drains the FIFO queue, returning the first idle entry whose [IdleEntry.expiresAt] is
* strictly after [cutoff]. Entries failing the check (including already-expired and near-expiry)
* are removed from idle membership and discarded so reconcile can replenish.
*/
private fun takeIdle(
poolName: String,
cutoff: Instant,
): String? {
val state = pools[poolName] ?: return null
val now = Instant.now()
while (true) {
val sandboxId = state.queue.poll() ?: return null
val entry = state.map.remove(sandboxId) ?: continue // already removed (e.g. by removeIdle)
if (entry.expiresAt.isAfter(now)) return sandboxId
// expired, discard and poll next
if (entry.expiresAt.isAfter(cutoff)) return sandboxId
// expired or below minRemainingTtl, discard and poll next
}
}

Expand Down Expand Up @@ -108,6 +127,21 @@ class InMemoryPoolStateStore : PoolStateStore {
state.queue.removeIf { sandboxId -> !state.map.containsKey(sandboxId) }
}

override fun reapExpiredIdle(
poolName: String,
now: Instant,
minRemainingTtl: Duration,
) {
if (minRemainingTtl.isNegative || minRemainingTtl.isZero) {
reapExpiredIdle(poolName, now)
return
}
val state = pools[poolName] ?: return
val cutoff = now.plus(minRemainingTtl)
state.map.entries.removeIf { !it.value.expiresAt.isAfter(cutoff) }
state.queue.removeIf { sandboxId -> !state.map.containsKey(sandboxId) }
}

override fun snapshotCounters(poolName: String): StoreCounters {
val state = pools[poolName] ?: return StoreCounters(idleCount = 0)
val now = Instant.now()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ internal object PoolReconciler {
val ttl = config.primaryLockTtl
val now = Instant.now()

stateStore.reapExpiredIdle(poolName, now)
stateStore.reapExpiredIdle(poolName, now, config.acquireMinRemainingTtl)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Kill live sandboxes reaped for min TTL

When acquireMinRemainingTtl is positive, this sweep now removes not-yet-expired idle entries from the store, but unlike the shrink path it does not return those IDs to onDiscardSandbox, so the live sandboxes are left running until their server TTL expires while reconcile immediately creates replacements. With larger configured thresholds this can keep up to a full idle buffer of discarded-but-still-running sandboxes around for a long time and temporarily exceed pool resource/quota expectations.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same root cause as the acquire-path leak — addressed in 4e4f12c. reapExpiredIdle(name, now, minTtl) now returns the alive evicted IDs (excluding fully-expired ones, since the server has reaped them). PoolReconciler routes the returned list through the existing onDiscardSandbox callback, which is the same path shrinkExcessIdle uses to fire killSandbox.

The Redis Lua script returns the alive list explicitly so the round-trip survives. New tests cover reapExpiredIdle with minRemainingTtl returns alive evicted entries and excludes already-expired from the alive list, both against in-memory and real-Redis stores.

val counters = stateStore.snapshotCounters(poolName)
val excess = (counters.idleCount - config.maxIdle).coerceAtLeast(0)
val toRemove = minOf(excess, config.warmupConcurrency)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ class SandboxPool internal constructor(
throw PoolNotRunningException("Cannot acquire when pool state is $state")
}
val poolName = config.poolName
val sandboxId = stateStore.tryTakeIdle(poolName)
val sandboxId = stateStore.tryTakeIdle(poolName, config.acquireMinRemainingTtl)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Expose acquireMinRemainingTtl on SandboxPool.Builder

When callers construct pools via SandboxPool.builder() (the public facade used in the README examples), there is no forwarding acquireMinRemainingTtl(...) method on that builder, so this new non-zero threshold can only be set by manually building a PoolConfig and passing it via config(...). Since acquire() now depends on config.acquireMinRemainingTtl here, users of the primary builder cannot enable the near-expiry filtering this change adds and will keep the old behavior.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — addressed in 8a6e953. SandboxPool.Builder now has a forwarding acquireMinRemainingTtl(...) setter (next to acquireSkipHealthCheck). Verified in SandboxPoolTest.sandbox pool builder forwards acquire readiness settings into config.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Reset sandbox TTL before recording idle expiry

In pools where sandbox creation/readiness or warmupSandboxPreparer consumes a meaningful part of idleTimeout, this check is based on the store expiry stamped by putIdle, but the server timeout was already started when Sandbox.builder().timeout(config.idleTimeout) created the sandbox before those warmup steps. That makes acquireMinRemainingTtl overestimate real server-side TTL by the warmup duration, so an idle sandbox can still pass this gate with less than the configured minimum remaining and expire during the subsequent ready check unless the sandbox is renewed immediately before it is recorded idle.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Real issue, thanks for flagging. The store stamps expiresAt = putIdle_time + idleTimeout, but the server-side TTL clock starts at Sandbox.builder().timeout(idleTimeout) — so any time spent in warmup / warmupSandboxPreparer makes the store overestimate the real server-side remaining TTL.

Fixing this correctly means reading server-truth expiry (e.g. via sandbox.getInfo()) instead of recomputing locally, which is a separate, broader change with cross-language impact. To keep this PR focused on the threshold logic from #983, I would prefer to file a follow-up issue and address it there. Happy to open that immediately if you agree.

The current 60s default still tolerates warmups well below a minute — which is the common case for the code-interpreter template — so it does not regress the user-reported scenario. For long warmups, opting out via Duration.ZERO or sizing acquireMinRemainingTtl against typical warmup duration is the documented escape hatch until the follow-up lands.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Clean up idle sandboxes discarded during acquire

When an acquire runs with a positive acquireMinRemainingTtl, the new store variant consumes every idle ID that is below the threshold before returning an eligible ID or null, but this acquire path only receives the returned ID and has no chance to kill the skipped live sandboxes. In pools that are acquired after entries age past the threshold, those skipped sandboxes are dropped from pool accounting and replaced by direct creates/reconcile while still running until their server-side TTL expires, temporarily exceeding the intended pool size and resource usage.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Real bug, addressed in 4e4f12c. The store API now surfaces the discarded-alive IDs to the caller:

  • New TakeIdleResult carrying sandboxId + discardedAliveSandboxIds. tryTakeIdle(name, minTtl) returns it.
  • SandboxPool.acquire calls a new killDiscardedAlive helper on the returned list (best-effort, errors logged and swallowed).
  • Already-expired entries are intentionally excluded from the list — the server has already reaped them, a kill round-trip would be wasted.

Verified by new unit tests in both Kotlin (tryTakeIdle surfaces alive entries below the threshold so callers can kill them / silently drops fully-expired entries) and Python equivalents, against both in-memory and real-Redis stores.

var noIdleReason: String? = null // null = got a sandbox from idle; non-null = reason we have no usable idle
var idleConnectFailure: Exception? = null
if (sandboxId != null) {
Expand Down Expand Up @@ -724,6 +724,11 @@ class SandboxPool internal constructor(
return this
}

fun acquireMinRemainingTtl(acquireMinRemainingTtl: Duration): Builder {
configBuilder.acquireMinRemainingTtl(acquireMinRemainingTtl)
return this
}

fun warmupReadyTimeout(warmupReadyTimeout: Duration): Builder {
configBuilder.warmupReadyTimeout(warmupReadyTimeout)
return this
Expand Down
Loading
Loading