Skip to content

Commit edabd11

Browse files
feat: require_background_worker refinements
Collapses the three-mode require semantics into two, moves the batch-declaration affordance to require instead of get_vars, and renames require to ensure per @dunglas's concern that "require" collides with PHP's language keyword (which takes a path). - frankenphp_require_background_worker -> frankenphp_ensure_background_worker. "Ensure" captures the "make sure this is running, start it if it isn't" semantic without the keyword collision. - Tolerant lazy-start inside frankenphp_handle_request: runtime ensure now behaves the same as non-worker mode (lazy-start + timeout + backoff tolerance). Bootstrap-before-handle_request keeps its fail-fast discipline. This lets processes start only the workers they actually exercise, instead of over-provisioning by pre-ensuring everything that might be needed. - Multi-name ensure: frankenphp_ensure_background_worker now accepts string|array. Batch declaration with a shared deadline, fail-fast on any worker's boot failure in bootstrap mode. get_vars loses the array form, becoming single-name pure read. - globalCtx.Done() wired into ensure's select cases, so in-flight calls unblock cleanly on FrankenPHP shutdown instead of waiting out their timeout. - Fix: markBackgroundReady() is now called on every set_vars, not just the first. Previously sk.readyOnce gated it, leaving isBootingScript stuck at true after a crash-restart. That misclassified subsequent crashes as boot failures and kept the readyWorkers metric decremented.
1 parent 5b84690 commit edabd11

27 files changed

Lines changed: 336 additions & 316 deletions

background_worker.go

Lines changed: 106 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -272,193 +272,156 @@ func getLookup(thread *phpThread) *backgroundWorkerLookup {
272272
return nil
273273
}
274274

275-
// requireMode describes how go_frankenphp_require_background_worker behaves
276-
// based on the caller context.
277-
type requireMode int
278-
279-
const (
280-
// requireModeBootstrap: HTTP worker before frankenphp_handle_request.
281-
// Lazy-starts the worker; fail-fast on boot failure (no backoff tolerance).
282-
requireModeBootstrap requireMode = iota
283-
// requireModeRuntime: HTTP worker inside frankenphp_handle_request.
284-
// Assert-only: worker must already be running; never lazy-starts.
285-
requireModeRuntime
286-
// requireModeNonWorker: non-worker context (classic request-per-process).
287-
// Lazy-starts and tolerates transient boot failures via backoff.
288-
requireModeNonWorker
289-
)
290-
291-
// getRequireMode determines how require_background_worker should behave
292-
// based on the caller's thread handler state.
293-
func getRequireMode(thread *phpThread) requireMode {
294-
if handler, ok := thread.handler.(*workerThread); ok {
295-
if handler.isBootingScript {
296-
return requireModeBootstrap
297-
}
298-
return requireModeRuntime
299-
}
300-
return requireModeNonWorker
275+
// isBootstrapEnsure reports whether the caller is an HTTP worker still
276+
// executing its bootstrap phase (before the first frankenphp_handle_request
277+
// call). Bootstrap is the only context that takes the strict fail-fast path;
278+
// everything else (worker runtime, classic request-per-process) uses the
279+
// tolerant lazy-start path.
280+
func isBootstrapEnsure(thread *phpThread) bool {
281+
handler, ok := thread.handler.(*workerThread)
282+
return ok && handler.isBootingScript
301283
}
302284

303-
// go_frankenphp_require_background_worker declares a dependency on a background
304-
// worker. Semantics depend on the caller context:
285+
// go_frankenphp_ensure_background_worker declares a dependency on one or
286+
// more background workers. The worker is lazy-started if it isn't already
287+
// running, then the call blocks until it has called set_vars() at least once
288+
// (ready state) or the timeout expires.
305289
//
306-
// - HTTP worker bootstrap (before frankenphp_handle_request): lazy-start and
307-
// fail-fast on boot failure (throws as soon as a boot attempt fails).
308-
// - HTTP worker runtime (inside frankenphp_handle_request): assert-only. The
309-
// worker must already be running (either declared with num 1 in Caddyfile
310-
// or previously required during bootstrap). Never lazy-starts.
311-
// - Non-worker mode: lazy-start and tolerate transient boot failures (wait
312-
// for the worker to recover up to the timeout).
290+
// When called from an HTTP worker's bootstrap (before frankenphp_handle_request),
291+
// it uses fail-fast semantics: any boot failure throws immediately with the
292+
// captured details, without waiting for the backoff/retry cycle. This turns
293+
// the bootstrap phase into a strict dependency declaration: a broken dep
294+
// visibly fails the HTTP worker rather than letting it serve degraded traffic.
313295
//
314-
//export go_frankenphp_require_background_worker
315-
func go_frankenphp_require_background_worker(threadIndex C.uintptr_t, name *C.char, nameLen C.size_t, timeoutMs C.int) *C.char {
296+
// Everywhere else (inside frankenphp_handle_request, classic request-per-process),
297+
// it waits tolerantly up to the timeout, letting the restart-with-backoff
298+
// cycle recover from transient boot failures.
299+
//
300+
//export go_frankenphp_ensure_background_worker
301+
func go_frankenphp_ensure_background_worker(threadIndex C.uintptr_t, names **C.char, nameLens *C.size_t, nameCount C.int, timeoutMs C.int) *C.char {
316302
thread := phpThreads[threadIndex]
317303
lookup := getLookup(thread)
318304
if lookup == nil {
319305
return C.CString("no background worker configured in this php_server")
320306
}
321307

322-
goName := C.GoStringN(name, C.int(nameLen))
323-
mode := getRequireMode(thread)
308+
n := int(nameCount)
309+
nameSlice := unsafe.Slice(names, n)
310+
nameLenSlice := unsafe.Slice(nameLens, n)
311+
bootstrap := isBootstrapEnsure(thread)
324312

325-
var sk *backgroundWorkerState
326-
if mode == requireModeRuntime {
327-
// Assert-only: worker must already be running.
328-
registry := lookup.Resolve(goName)
329-
if registry == nil {
330-
return C.CString("background worker " + goName + " is not running; call frankenphp_require_background_worker before frankenphp_handle_request")
331-
}
332-
registry.mu.Lock()
333-
sk = registry.workers[goName]
334-
registry.mu.Unlock()
335-
if sk == nil {
336-
return C.CString("background worker " + goName + " is not running; call frankenphp_require_background_worker before frankenphp_handle_request")
337-
}
338-
} else {
339-
// Bootstrap or non-worker: lazy-start if needed.
340-
if err := startBackgroundWorker(thread, goName); err != nil {
313+
sks := make([]*backgroundWorkerState, n)
314+
goNames := make([]string, n)
315+
for i := 0; i < n; i++ {
316+
goNames[i] = C.GoStringN(nameSlice[i], C.int(nameLenSlice[i]))
317+
318+
if err := startBackgroundWorker(thread, goNames[i]); err != nil {
341319
return C.CString(err.Error())
342320
}
343-
registry := lookup.Resolve(goName)
321+
registry := lookup.Resolve(goNames[i])
344322
if registry == nil {
345-
return C.CString("background worker not found: " + goName)
323+
return C.CString("background worker not found: " + goNames[i])
346324
}
347325
registry.mu.Lock()
348-
sk = registry.workers[goName]
326+
sks[i] = registry.workers[goNames[i]]
349327
registry.mu.Unlock()
350-
if sk == nil {
351-
return C.CString("background worker not found: " + goName)
328+
if sks[i] == nil {
329+
return C.CString("background worker not found: " + goNames[i])
352330
}
353331
}
354332

333+
// Shared deadline across all workers.
355334
deadline := time.After(time.Duration(timeoutMs) * time.Millisecond)
356-
if mode == requireModeBootstrap {
357-
// Fail-fast: watch for bootFailure alongside ready/deadline.
335+
if bootstrap {
336+
// Fail-fast: watch for bootFailure on any worker alongside ready/deadline.
358337
ticker := time.NewTicker(50 * time.Millisecond)
359338
defer ticker.Stop()
360-
for {
361-
select {
362-
case <-sk.ready:
363-
return nil
364-
case <-deadline:
365-
return C.CString(formatBackgroundWorkerTimeoutError(goName, sk))
366-
case <-ticker.C:
367-
if sk.bootFailure.Load() != nil {
368-
return C.CString(formatBackgroundWorkerTimeoutError(goName, sk))
339+
for i, sk := range sks {
340+
wait:
341+
for {
342+
select {
343+
case <-sk.ready:
344+
break wait
345+
case <-deadline:
346+
return C.CString(formatBackgroundWorkerTimeoutError(goNames[i], sk))
347+
case <-globalCtx.Done():
348+
return C.CString("frankenphp is shutting down")
349+
case <-ticker.C:
350+
if sk.bootFailure.Load() != nil {
351+
return C.CString(formatBackgroundWorkerTimeoutError(goNames[i], sk))
352+
}
369353
}
370354
}
371355
}
356+
return nil
372357
}
373358

374-
// Runtime or non-worker: wait for ready or timeout.
375-
select {
376-
case <-sk.ready:
377-
return nil
378-
case <-deadline:
379-
return C.CString(formatBackgroundWorkerTimeoutError(goName, sk))
359+
// Tolerant: wait for each worker's ready, shared deadline.
360+
for i, sk := range sks {
361+
select {
362+
case <-sk.ready:
363+
// ready
364+
case <-deadline:
365+
return C.CString(formatBackgroundWorkerTimeoutError(goNames[i], sk))
366+
case <-globalCtx.Done():
367+
return C.CString("frankenphp is shutting down")
368+
}
380369
}
370+
return nil
381371
}
382372

383-
// go_frankenphp_get_vars is a pure read: looks up already-running workers
384-
// and copies their vars into the return value. It never starts a worker or
385-
// waits for one to become ready. Callers should have ensured workers are
386-
// running via frankenphp_require_background_worker first.
373+
// go_frankenphp_get_vars is a pure read: looks up an already-running worker
374+
// and copies its vars into the return value. It never starts a worker or
375+
// waits for one to become ready. Callers should have ensured the worker is
376+
// running via frankenphp_ensure_background_worker first.
387377
//
388-
// callerVersions/outVersions: if callerVersions is non-nil and all versions match,
389-
// the copy is skipped entirely. outVersions receives current versions.
378+
// callerVersion/outVersion: if callerVersion is non-nil and the version matches,
379+
// the copy is skipped entirely. outVersion receives the current version.
390380
//
391381
//export go_frankenphp_get_vars
392-
func go_frankenphp_get_vars(threadIndex C.uintptr_t, names **C.char, nameLens *C.size_t, nameCount C.int, returnValue *C.zval, callerVersions *C.uint64_t, outVersions *C.uint64_t) *C.char {
382+
func go_frankenphp_get_vars(threadIndex C.uintptr_t, name *C.char, nameLen C.size_t, returnValue *C.zval, callerVersion *C.uint64_t, outVersion *C.uint64_t) *C.char {
393383
thread := phpThreads[threadIndex]
394384
lookup := getLookup(thread)
395385
if lookup == nil {
396386
return C.CString("no background worker configured in this php_server")
397387
}
398388

399-
n := int(nameCount)
400-
nameSlice := unsafe.Slice(names, n)
401-
nameLenSlice := unsafe.Slice(nameLens, n)
402-
403-
sks := make([]*backgroundWorkerState, n)
404-
for i := 0; i < n; i++ {
405-
goName := C.GoStringN(nameSlice[i], C.int(nameLenSlice[i]))
406-
407-
registry := lookup.Resolve(goName)
408-
if registry == nil {
409-
return C.CString("background worker not running: " + goName + " (call frankenphp_require_background_worker first)")
410-
}
411-
registry.mu.Lock()
412-
sks[i] = registry.workers[goName]
413-
registry.mu.Unlock()
414-
if sks[i] == nil {
415-
return C.CString("background worker not running: " + goName + " (call frankenphp_require_background_worker first)")
416-
}
389+
goName := C.GoStringN(name, C.int(nameLen))
417390

418-
// Must have reached ready state (i.e. called set_vars at least once).
419-
select {
420-
case <-sks[i].ready:
421-
default:
422-
return C.CString("background worker not ready: " + goName + " (no set_vars call yet)")
423-
}
391+
registry := lookup.Resolve(goName)
392+
if registry == nil {
393+
return C.CString("background worker not running: " + goName + " (call frankenphp_ensure_background_worker first)")
424394
}
425-
426-
// Fast path: if all caller versions match, skip lock + copy entirely.
427-
if callerVersions != nil && outVersions != nil {
428-
callerVSlice := unsafe.Slice(callerVersions, n)
429-
outVSlice := unsafe.Slice(outVersions, n)
430-
allMatch := true
431-
for i, sk := range sks {
432-
v := sk.varsVersion.Load()
433-
outVSlice[i] = C.uint64_t(v)
434-
if uint64(callerVSlice[i]) != v {
435-
allMatch = false
436-
}
437-
}
438-
if allMatch {
439-
return nil
440-
}
395+
registry.mu.Lock()
396+
sk := registry.workers[goName]
397+
registry.mu.Unlock()
398+
if sk == nil {
399+
return C.CString("background worker not running: " + goName + " (call frankenphp_ensure_background_worker first)")
441400
}
442401

443-
// Take all read locks, collect pointers, copy via C helper, then release
444-
ptrs := make([]unsafe.Pointer, n)
445-
for i, sk := range sks {
446-
sk.mu.RLock()
447-
ptrs[i] = sk.varsPtr
402+
// Must have reached ready state (i.e. called set_vars at least once).
403+
select {
404+
case <-sk.ready:
405+
default:
406+
return C.CString("background worker not ready: " + goName + " (no set_vars call yet)")
448407
}
449408

450-
C.frankenphp_worker_copy_vars(returnValue, C.int(n), names, nameLens, (*unsafe.Pointer)(unsafe.Pointer(&ptrs[0])))
451-
452-
if outVersions != nil {
453-
outVSlice := unsafe.Slice(outVersions, n)
454-
for i, sk := range sks {
455-
outVSlice[i] = C.uint64_t(sk.varsVersion.Load())
409+
// Fast path: caller-version match skips the copy entirely.
410+
if callerVersion != nil && outVersion != nil {
411+
v := sk.varsVersion.Load()
412+
*outVersion = C.uint64_t(v)
413+
if uint64(*callerVersion) == v {
414+
return nil
456415
}
457416
}
458417

459-
for _, sk := range sks {
460-
sk.mu.RUnlock()
418+
sk.mu.RLock()
419+
ptr := sk.varsPtr
420+
C.frankenphp_worker_copy_vars(returnValue, 1, &name, &nameLen, (*unsafe.Pointer)(unsafe.Pointer(&ptr)))
421+
if outVersion != nil {
422+
*outVersion = C.uint64_t(sk.varsVersion.Load())
461423
}
424+
sk.mu.RUnlock()
462425

463426
return nil
464427
}
@@ -498,10 +461,15 @@ func go_frankenphp_set_vars(threadIndex C.uintptr_t, varsPtr unsafe.Pointer, old
498461
sk.varsVersion.Add(1)
499462
sk.mu.Unlock()
500463

464+
// Close the ready channel at-most-once; a second close would panic.
501465
sk.readyOnce.Do(func() {
502-
bgHandler.markBackgroundReady()
503466
close(sk.ready)
504467
})
468+
// markBackgroundReady is idempotent (guarded by isBootingScript). It must
469+
// run on every set_vars because setupScript sets isBootingScript back to
470+
// true on each (re)boot, and the next set_vars has to flip it to false
471+
// again so subsequent crashes are classified as crashes, not boot failures.
472+
bgHandler.markBackgroundReady()
505473

506474
return nil
507475
}

background_worker_test.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,54 @@ func TestBackgroundWorkerSetVarsMarksWorkerReady(t *testing.T) {
9494
assert.Equal(t, 1, testMetrics.readyCalls)
9595
}
9696

97+
func TestBackgroundWorkerRestartReReachesReady(t *testing.T) {
98+
// After a crash-restart cycle, setupScript flips isBootingScript back to
99+
// true and the next set_vars must flip it back to false. Otherwise any
100+
// subsequent crash would be misclassified as a boot failure and the
101+
// readyWorkers metric would stay decremented.
102+
originalMetrics := metrics
103+
testMetrics := &backgroundWorkerTestMetrics{}
104+
metrics = testMetrics
105+
t.Cleanup(func() {
106+
metrics = originalMetrics
107+
})
108+
109+
handler := &backgroundWorkerThread{
110+
thread: newPHPThread(0),
111+
worker: &worker{
112+
name: "background-worker",
113+
fileName: "background-worker.php",
114+
maxConsecutiveFailures: -1,
115+
backgroundWorker: &backgroundWorkerState{ready: make(chan struct{})},
116+
},
117+
isBootingScript: true,
118+
}
119+
120+
// First boot reaches ready.
121+
handler.markBackgroundReady()
122+
assert.False(t, handler.isBootingScript)
123+
assert.Equal(t, 1, testMetrics.readyCalls)
124+
125+
// Worker crashes after ready: exit status != 0, isBootingScript == false.
126+
handler.afterScriptExecution(1)
127+
require.Len(t, testMetrics.stopCalls, 1)
128+
assert.Equal(t, StopReason(StopReasonCrash), testMetrics.stopCalls[0])
129+
130+
// Restart: setupScript would set isBootingScript = true again.
131+
handler.isBootingScript = true
132+
133+
// Next set_vars call must flip it back to false and increment ReadyWorker.
134+
handler.markBackgroundReady()
135+
assert.False(t, handler.isBootingScript)
136+
assert.Equal(t, 2, testMetrics.readyCalls)
137+
138+
// A second crash at this point must still be classified as Crash, not BootFailure.
139+
testMetrics.stopCalls = nil
140+
handler.afterScriptExecution(1)
141+
require.Len(t, testMetrics.stopCalls, 1)
142+
assert.Equal(t, StopReason(StopReasonCrash), testMetrics.stopCalls[0])
143+
}
144+
97145
func TestBackgroundWorkerBootFailureStaysBootFailureUntilReady(t *testing.T) {
98146
originalMetrics := metrics
99147
testMetrics := &backgroundWorkerTestMetrics{}

cli_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ func TestBackgroundWorkerFunctionsHiddenInCLI(t *testing.T) {
4242
assert.NoError(t, err)
4343

4444
out := string(stdoutStderr)
45-
assert.Contains(t, out, "frankenphp_require_background_worker:hidden")
45+
assert.Contains(t, out, "frankenphp_ensure_background_worker:hidden")
4646
assert.Contains(t, out, "frankenphp_set_vars:hidden")
4747
assert.Contains(t, out, "frankenphp_get_vars:hidden")
4848
assert.Contains(t, out, "frankenphp_get_worker_handle:hidden")

0 commit comments

Comments
 (0)