fix: address Copilot review — defer heartbeat cleanup, tighten tests, fix CORS/parallel test gaps

github-actions[bot] · github-actions[bot] · commit 762cfd304f2a · 2026-04-22T15:56:39.000-07:00
- Server.swift: add defer-based heartbeat cleanup in both handleChatStreaming and
  handleTextStreaming so heartbeatTask is always cancelled on any exit path
  (client disconnect during prefill no longer leaks the heartbeat task)
- ServerSSETests.swift: add missing import Foundation for Data/JSONSerialization
- test-server.sh Test 32: fail on empty curl response instead of false-passing
- test-server.sh Test 33: use conditional curl; fail if request fails entirely
- test-server.sh Test 34: redirect CORS preflight to CORS_PORT (--cors server)
  instead of the main server which has no CORS middleware
- test-server.sh Test 35: spin up a dedicated --parallel 2 server so concurrent
  requests actually overlap and stress the global hook under real parallelism
- test-opencode.sh: capture opencode exit code separately; classify parse errors
  vs acceptable non-zero exits to prevent false passes
diff --git a/Sources/SwiftLM/Server.swift b/Sources/SwiftLM/Server.swift
@@ -1432,6 +1432,13 @@ func handleChatStreaming(
         var stopped = false
         var firstToken = true
         var tracker = ThinkingStateTracker()
+        // Unconditional cleanup: guarantees heartbeat is cancelled on ALL exit paths
+        // (normal completion, client disconnect, or task cancellation during prefill).
+        defer {
+            heartbeatTask?.cancel()
+            heartbeatTask = nil
+            activePrefillProgressHook = nil
+        }
         
         // ── JSON mode streaming: buffer early tokens to strip hallucinated prefixes ──
         var jsonBuffering = jsonMode
@@ -1854,6 +1861,13 @@ func handleTextStreaming(
         var fullText = ""
         var stopped = false
         var firstToken = true
+        // Unconditional cleanup: guarantees heartbeat is cancelled on ALL exit paths
+        // (normal completion, client disconnect, or task cancellation during prefill).
+        defer {
+            heartbeatTask?.cancel()
+            heartbeatTask = nil
+            activePrefillProgressHook = nil
+        }
         for await generation in stream {
             if stopped { break }
             switch generation {
diff --git a/tests/SwiftLMTests/ServerSSETests.swift b/tests/SwiftLMTests/ServerSSETests.swift
@@ -1,4 +1,5 @@
 import XCTest
+import Foundation
 @testable import SwiftLM
 
 final class ServerSSETests: XCTestCase {
diff --git a/tests/test-opencode.sh b/tests/test-opencode.sh
@@ -128,18 +128,37 @@ npm install opencode-ai@latest --silent >/dev/null 2>&1
 log "Running opencode CLI against SwiftLM server..."
 # We use openai/gpt-4o-mini so the CLI validation passes. SwiftLM ignores the requested model and serves Gemma-4.
 # We pipe 'yes' to handle any standard input confirmation OpenCode asks for, and use --dangerously-skip-permissions
-OPENAI_BASE_URL="$URL/v1" OPENAI_API_KEY="sk-test" yes | npx --yes opencode run "Say 'I am ready'." --model openai/gpt-4o-mini --pure --dangerously-skip-permissions > /tmp/opencode_cli.log 2>&1 || true
-
-if grep -q "Success" /tmp/opencode_cli.log || grep -qi "ready" /tmp/opencode_cli.log || test -s /tmp/opencode_cli.log; then
-    if ! grep -qi "parse error" /tmp/opencode_cli.log && ! grep -qi "Unexpected token" /tmp/opencode_cli.log && ! grep -qi "Model not found" /tmp/opencode_cli.log; then
-        pass "OpenCode CLI parsed the stream successfully and completed the generation"
+# Capture exit code separately — do NOT use || true, we need the real exit status.
+set +e
+yes | npx --yes opencode run "Say 'I am ready'." \
+    --model openai/gpt-4o-mini \
+    --pure \
+    --dangerously-skip-permissions \
+    > /tmp/opencode_cli.log 2>&1
+OPENCODE_EXIT=$?
+set -e
+
+OPENCODE_LOG=$(cat /tmp/opencode_cli.log 2>/dev/null || true)
+
+if [ $OPENCODE_EXIT -ne 0 ]; then
+    # Check if it's a known transient failure we can accept (e.g. model list refresh)
+    if echo "$OPENCODE_LOG" | grep -qi "parse error" || echo "$OPENCODE_LOG" | grep -qi "Unexpected token"; then
+        fail "OpenCode CLI crashed while parsing the SSE stream (streaming protocol error)"
+        echo "--- opencode output ---"
+        echo "$OPENCODE_LOG"
     else
-        fail "OpenCode CLI crashed while parsing the stream or rejected the model"
-        cat /tmp/opencode_cli.log
+        # Non-zero exit but not a streaming parse error — acceptable for a dev agent
+        # (e.g. it may exit non-zero after a successful generation if no tool was called)
+        if ! echo "$OPENCODE_LOG" | grep -qi "Model not found" && [ -n "$OPENCODE_LOG" ]; then
+            pass "OpenCode CLI completed (exit $OPENCODE_EXIT) — no SSE parse errors detected"
+        else
+            fail "OpenCode CLI failed with exit $OPENCODE_EXIT"
+            echo "--- opencode output ---"
+            echo "$OPENCODE_LOG"
+        fi
     fi
 else
-    fail "OpenCode CLI failed to run or generated empty output"
-    cat /tmp/opencode_cli.log
+    pass "OpenCode CLI exited cleanly (exit 0) — stream parsed successfully"
 fi
 
 # ── Results ──────────────────────────────────────────────────────────
diff --git a/tests/test-server.sh b/tests/test-server.sh
@@ -963,38 +963,55 @@ fi
 # ── Test 32: Default streaming is strict (no prefill_progress event leaks) ──
 log "Test 32: Default streaming is strict (no prefill_progress leaks)"
 
-STRICT_STREAM=$(curl -sf -N -X POST "$URL/v1/chat/completions" \
+if STRICT_STREAM=$(curl -sf -N -X POST "$URL/v1/chat/completions" \
     -H "Content-Type: application/json" \
     -d "{\"model\":\"$MODEL\",\"stream\":true,\"max_tokens\":20,\"messages\":[{\"role\":\"user\",\"content\":\"Say hi.\"}]}" \
-    --max-time 30 2>/dev/null || true)
+    --max-time 30 2>/dev/null); then
+    :
+else
+    fail "Strict mode: curl request failed — cannot evaluate strict streaming"
+    STRICT_STREAM=""
+fi
 
-if echo "$STRICT_STREAM" | grep -q "^event:"; then
+if [ -z "$STRICT_STREAM" ] || ! echo "$STRICT_STREAM" | grep -q 'data: \[DONE\]'; then
+    # Only fail if it was a curl failure (empty), not a missing event
+    [ -z "$STRICT_STREAM" ] && fail "Strict mode: stream was empty"
+elif echo "$STRICT_STREAM" | grep -q "^event:"; then
     fail "Strict mode: unexpected named SSE event without opt-in header"
 else
     pass "Strict mode: no named SSE events in default streaming"
 fi
 
-if echo "$STRICT_STREAM" | grep -q '"prefill_progress"'; then
-    fail "Strict mode: prefill_progress payload leaked into default stream"
-else
-    pass "Strict mode: no prefill_progress object in default stream"
+if [ -n "$STRICT_STREAM" ]; then
+    if echo "$STRICT_STREAM" | grep -q '"prefill_progress"'; then
+        fail "Strict mode: prefill_progress payload leaked into default stream"
+    else
+        pass "Strict mode: no prefill_progress object in default stream"
+    fi
 fi
 
 
 # ── Test 33: Opt-in header enables named SSE event ────────────────────────────
 log "Test 33: Opt-in header enables named SSE event"
 
-OPTIN_STREAM=$(curl -sf -N -X POST "$URL/v1/chat/completions" \
+if OPTIN_STREAM=$(curl -sf -N -X POST "$URL/v1/chat/completions" \
     -H "Content-Type: application/json" \
     -H "X-SwiftLM-Prefill-Progress: true" \
     -d "{\"model\":\"$MODEL\",\"stream\":true,\"max_tokens\":20,\"messages\":[{\"role\":\"user\",\"content\":\"Say a very long sentence that will definitely take some time to process.\"}]}" \
-    --max-time 30 2>/dev/null || true)
+    --max-time 30 2>/dev/null); then
+    :
+else
+    fail "Opt-in: streaming request failed"
+    OPTIN_STREAM=""
+fi
 
-if echo "$OPTIN_STREAM" | grep -q "^event: prefill_progress"; then
+if [ -n "$OPTIN_STREAM" ] && echo "$OPTIN_STREAM" | grep -q "^event: prefill_progress"; then
     pass "Opt-in: named prefill_progress event received"
-else
+elif [ -n "$OPTIN_STREAM" ] && echo "$OPTIN_STREAM" | grep -Fq "data: [DONE]"; then
     log "  ⚠️  WARN: no heartbeat (prompt may have been too short for 2s window)"
     pass "Opt-in: header accepted without error (heartbeat timing not guaranteed in CI)"
+elif [ -n "$OPTIN_STREAM" ]; then
+    fail "Opt-in: stream did not complete successfully (missing [DONE])"
 fi
 
 EVENT_DATA=$(echo "$OPTIN_STREAM" | grep -A1 "^event: prefill_progress" | grep "^data:" | head -1 | sed 's/^data: //')
@@ -1014,9 +1031,21 @@ fi
 
 
 # ── Test 34: CORS preflight exposes X-SwiftLM-Prefill-Progress header ─────────
+# Must target the dedicated --cors server on CORS_PORT (main server has no CORS middleware).
 log "Test 34: CORS preflight exposes X-SwiftLM-Prefill-Progress"
 
-OPTIONS_RESP=$(curl -sf -D - -o /dev/null -X OPTIONS "$URL/v1/chat/completions" \
+# Re-start CORS server if it was cleaned up after Test 13b
+if ! curl -sf "http://${HOST}:${CORS_PORT}/health" >/dev/null 2>&1; then
+    log "  Re-starting CORS server on port $CORS_PORT for Test 34..."
+    "$BINARY" --model "$MODEL" --port "$CORS_PORT" --host "$HOST" --cors '*' > /dev/null 2>&1 &
+    CORS_SERVER_PID=$!
+    for i in $(seq 1 60); do
+        curl -sf "http://${HOST}:${CORS_PORT}/health" >/dev/null 2>&1 && break
+        sleep 1
+    done
+fi
+
+OPTIONS_RESP=$(curl -sf -D - -o /dev/null -X OPTIONS "http://${HOST}:${CORS_PORT}/v1/chat/completions" \
     -H "Origin: http://example.com" \
     -H "Access-Control-Request-Method: POST" \
     -H "Access-Control-Request-Headers: X-SwiftLM-Prefill-Progress" 2>&1 || true)
@@ -1028,21 +1057,32 @@ else
 fi
 
 
-# ── Test 35: Concurrent opt-in requests ───────────────────────────────────────
+# ── Test 35: Concurrent opt-in requests (--parallel 2 server) ────────────────
 log "Test 35: Concurrent opt-in requests"
 
+# Use a dedicated --parallel 2 server so both requests execute simultaneously,
+# actually stressing the heartbeat hook under parallel generation.
+PARALLEL_PORT=$((PORT + 3))
+log "  Starting --parallel 2 server on port $PARALLEL_PORT..."
+"$BINARY" --model "$MODEL" --port "$PARALLEL_PORT" --host "$HOST" --parallel 2 > /dev/null 2>&1 &
+PARALLEL_SERVER_PID=$!
+for i in $(seq 1 60); do
+    curl -sf "http://${HOST}:${PARALLEL_PORT}/health" >/dev/null 2>&1 && break
+    sleep 1
+done
+
 CONCURRENT_OPTIN_PASS=true
 PID_A=""
 PID_B=""
 
-curl -sf -N -X POST "$URL/v1/chat/completions" \
+curl -sf -N -X POST "http://${HOST}:${PARALLEL_PORT}/v1/chat/completions" \
     -H "Content-Type: application/json" \
     -H "X-SwiftLM-Prefill-Progress: true" \
     -d "{\"model\":\"$MODEL\",\"stream\":true,\"max_tokens\":10,\"messages\":[{\"role\":\"user\",\"content\":\"Say one.\"}]}" \
     -o /tmp/mlx_optin_A.txt &
 PID_A=$!
 
-curl -sf -N -X POST "$URL/v1/chat/completions" \
+curl -sf -N -X POST "http://${HOST}:${PARALLEL_PORT}/v1/chat/completions" \
     -H "Content-Type: application/json" \
     -H "X-SwiftLM-Prefill-Progress: true" \
     -d "{\"model\":\"$MODEL\",\"stream\":true,\"max_tokens\":10,\"messages\":[{\"role\":\"user\",\"content\":\"Say two.\"}]}" \
@@ -1054,14 +1094,16 @@ wait "$PID_B" || CONCURRENT_OPTIN_PASS=false
 
 if [ "$CONCURRENT_OPTIN_PASS" = true ]; then
     if grep -q "data: \[DONE\]" /tmp/mlx_optin_A.txt && grep -q "data: \[DONE\]" /tmp/mlx_optin_B.txt; then
-        pass "Concurrent opt-in: both requests completed successfully"
+        pass "Concurrent opt-in: both requests completed successfully under --parallel 2"
     else
         fail "Concurrent opt-in: one or both streams did not complete"
     fi
 else
     fail "Concurrent opt-in: curl failed"
 fi
 rm -f /tmp/mlx_optin_A.txt /tmp/mlx_optin_B.txt
+kill "$PARALLEL_SERVER_PID" 2>/dev/null || true
+wait "$PARALLEL_SERVER_PID" 2>/dev/null || true
 
 
 # ── Test 36: /v1/completions (text endpoint) respects opt-in header ───────────

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`import XCTest`
	`2`	`+import Foundation`
`2`	`3`	`@testable import SwiftLM`
`3`	`4`
`4`	`5`	`final class ServerSSETests: XCTestCase {`