github · lpcox · Jun 18, 2026 · Jun 18, 2026
diff --git a/.github/workflows/test-gvisor-firewall-comparison.yml b/.github/workflows/test-gvisor-firewall-comparison.yml
@@ -59,7 +59,22 @@ jobs:
             -v /tmp/squid.conf:/etc/squid/squid.conf:ro \
             ubuntu/squid:latest
 
-          sleep 3
+          # Wait for Squid to be ready (up to 30 seconds)
+          echo "Waiting for Squid to be ready..."
+          for i in {1..30}; do
+            if docker run --rm --network awf-test curlimages/curl:latest \
+              curl -s -o /dev/null -w "%{http_code}" \
+              -x http://172.30.0.10:3128 http://example.com | grep -q "200"; then
+              echo "✅ Squid is ready"
+              break
+            fi
+            if [ $i -eq 30 ]; then
+              echo "❌ Squid failed to start within 30 seconds"
+              docker logs squid-test
+              exit 1
+            fi
+            sleep 1
+          done
 
           # Start agent with iptables DNAT  
           docker run --rm --name agent-test \
@@ -182,7 +197,22 @@ jobs:
             -v /tmp/squid.conf:/etc/squid/squid.conf:ro \
             ubuntu/squid:latest
 
-          sleep 3
+          # Wait for Squid to be ready (up to 30 seconds)
+          echo "Waiting for Squid to be ready..."
+          for i in {1..30}; do
+            if docker run --rm --network awf-test-gvisor curlimages/curl:latest \
+              curl -s -o /dev/null -w "%{http_code}" \
+              -x http://172.30.0.10:3128 http://example.com | grep -q "200"; then
+              echo "✅ Squid is ready"
+              break
+            fi
+            if [ $i -eq 30 ]; then
+              echo "❌ Squid failed to start within 30 seconds"
+              docker logs squid-gvisor
+              exit 1
+            fi
+            sleep 1
+          done
 
           # Agent on gVisor with iptables
           docker run --rm --name agent-gvisor \
@@ -318,7 +348,21 @@ jobs:
             envoyproxy/envoy:v1.28-latest \
             -c /etc/envoy/envoy.yaml
 
-          sleep 3
+          # Wait for Envoy to be ready (check admin port up to 30 seconds)
+          echo "Waiting for Envoy to be ready..."
+          for i in {1..30}; do
+            if docker run --rm --network envoy-test curlimages/curl:latest \
+              curl -s -o /dev/null -w "%{http_code}" http://172.30.0.10:9901/ready | grep -q "200"; then
+              echo "✅ Envoy is ready"
+              break
+            fi
+            if [ $i -eq 30 ]; then
+              echo "❌ Envoy failed to start within 30 seconds"
+              docker logs envoy-test
+              exit 1
+            fi
+            sleep 1
+          done
 
           # Agent with iptables redirect to Envoy
           docker run --rm --name agent-envoy \
@@ -443,7 +487,21 @@ jobs:
             -v /tmp/envoy.yaml:/etc/envoy/envoy.yaml:ro \
             envoyproxy/envoy:v1.28-latest -c /etc/envoy/envoy.yaml
 
-          sleep 3
+          # Wait for Envoy to be ready (check admin port up to 30 seconds)
+          echo "Waiting for Envoy to be ready..."
+          for i in {1..30}; do
+            if docker run --rm --network envoy-gvisor curlimages/curl:latest \
+              curl -s -o /dev/null -w "%{http_code}" http://172.30.0.10:9901/ready | grep -q "200"; then
+              echo "✅ Envoy is ready"
+              break
+            fi
+            if [ $i -eq 30 ]; then
+              echo "❌ Envoy failed to start within 30 seconds"
+              docker logs envoy-gvisor
+              exit 1
+            fi
+            sleep 1
+          done
 
           # Agent on gVisor
           docker run --rm --runtime=runsc \
@@ -507,7 +565,22 @@ jobs:
             -v /tmp/squid.conf:/etc/squid/squid.conf:ro \
             ubuntu/squid:latest
 
-          sleep 3
+          # Wait for Squid to be ready (up to 30 seconds)
+          echo "Waiting for Squid to be ready..."
+          for i in {1..30}; do
+            if docker run --rm --network squid-perf curlimages/curl:latest \
+              curl -s -o /dev/null -w "%{http_code}" \
+              -x http://squid-perf:3128 http://example.com | grep -q "200"; then
+              echo "✅ Squid is ready"
+              break
+            fi
+            if [ $i -eq 30 ]; then
+              echo "❌ Squid failed to start within 30 seconds"
+              docker logs squid-perf
+              exit 1
+            fi
+            sleep 1
+          done
 
           # Run 100 requests and measure latency
           AVG=$(docker run --rm --network squid-perf \
@@ -588,7 +661,21 @@ jobs:
             -v /tmp/envoy.yaml:/etc/envoy/envoy.yaml:ro \
             envoyproxy/envoy:v1.28-latest -c /etc/envoy/envoy.yaml
 
-          sleep 3
+          # Wait for Envoy to be ready (check admin port up to 30 seconds)
+          echo "Waiting for Envoy to be ready..."
+          for i in {1..30}; do
+            if docker run --rm --network envoy-perf curlimages/curl:latest \
+              curl -s -o /dev/null -w "%{http_code}" http://envoy-perf:9901/ready | grep -q "200"; then
+              echo "✅ Envoy is ready"
+              break
+            fi
+            if [ $i -eq 30 ]; then
+              echo "❌ Envoy failed to start within 30 seconds"
+              docker logs envoy-perf
+              exit 1
+            fi
+            sleep 1
+          done
 
           # Run 100 requests and measure latency
           AVG=$(docker run --rm --network envoy-perf \

diff --git a/containers/api-proxy/token-parsers.js b/containers/api-proxy/token-parsers.js
@@ -18,6 +18,35 @@ function isStreamingResponse(headers) {
   return ct.includes('text/event-stream');
 }
 
+/**
+ * Heuristically determine whether a request path targets an LLM
+ * completion-style endpoint that is expected to report token usage.
+ *
+ * Used to decide whether a successful (2xx) response that yielded no
+ * extractable usage should still produce a token-usage record (marked
+ * `usage_missing`) rather than being silently dropped. Non-completion
+ * traffic (e.g. `/models`, health checks) is excluded to avoid noise.
+ *
+ * Matches: OpenAI/Copilot chat + responses + legacy completions, Anthropic
+ * messages, and Gemini generateContent/streamGenerateContent — across
+ * optional version prefixes and query strings.
+ *
+ * @param {string} path - Request path (may include a query string)
+ * @returns {boolean}
+ */
+function looksLikeCompletionRequest(path) {
+  if (typeof path !== 'string' || path.length === 0) return false;
+  const pathOnly = path.split('?')[0].toLowerCase();
+  return (
+    pathOnly.includes('/chat/completions') ||
+    pathOnly.includes('/responses') ||
+    pathOnly.includes('/messages') ||
+    pathOnly.includes('/completions') ||
+    pathOnly.includes(':generatecontent') ||
+    pathOnly.includes(':streamgeneratecontent')
+  );
+}
+
 /**
  * Check if a response is gzip or deflate compressed.
  */
@@ -310,6 +339,7 @@ function normalizeUsage(usage) {
 
 module.exports = {
   isStreamingResponse,
+  looksLikeCompletionRequest,
   isCompressedResponse,
   createDecompressor,
   extractReasoningTokens,

diff --git a/containers/api-proxy/token-tracker-http.js b/containers/api-proxy/token-tracker-http.js
@@ -21,6 +21,7 @@
 const { logRequest } = require('./logging');
 const {
   isStreamingResponse,
+  looksLikeCompletionRequest,
   isCompressedResponse,
   createDecompressor,
   parseSseDataLines,
@@ -268,6 +269,59 @@ function buildAndWriteTokenRecord(normalized, { requestId, provider, model, reqP
   });
 }
 
+/**
+ * Persist a placeholder token-usage record for a successful completion-style
+ * response from which no usage could be extracted.
+ *
+ * Without this, a 2xx LLM response whose body omits a usage payload (observed
+ * with some Copilot streaming responses) produces NO line in token-usage.jsonl
+ * at all — the request becomes invisible to downstream consumers (step
+ * summaries, OTEL fan-out, AI-credit aggregation). Writing a zeroed record
+ * flagged with `usage_missing: true` keeps the request observable and makes the
+ * extraction gap diagnosable, while clearly signalling that the token counts
+ * are not real measured values.
+ *
+ * @param {object} params
+ * @param {string} params.requestId
+ * @param {string} params.provider
+ * @param {string|null} params.model
+ * @param {string} params.reqPath
+ * @param {number} params.status
+ * @param {boolean} params.streaming
+ * @param {number} params.duration
+ * @param {number} params.responseBytes
+ */
+function writeMissingUsageRecord({ requestId, provider, model, reqPath, status, streaming, duration, responseBytes }) {
+  const zeroUsage = {
+    input_tokens: 0,
+    output_tokens: 0,
+    cache_read_tokens: 0,
+    cache_write_tokens: 0,
+    reasoning_tokens: 0,
+  };
+  const record = buildTokenUsageRecord(zeroUsage, {
+    requestId,
+    provider,
+    model,
+    reqPath,
+    status,
+    streaming,
+    duration,
+    responseBytes,
+  });
+  record.usage_missing = true;
+  writeTokenUsage(record);
+
+  logRequest('warn', 'token_usage_missing', {
+    request_id: requestId,
+    provider,
+    model: model || 'unknown',
+    path: reqPath,
+    streaming,
+    status,
+  });
+}
+
 /**
  * Finalize token tracking for an HTTP response.
  *
@@ -313,6 +367,23 @@ function finalizeHttpTracking(state, proxyRes, opts) {
 
   const normalized = normalizeUsage(usage);
   if (!normalized) {
+    // No usage payload was extracted from a successful response. For
+    // completion-style endpoints (where usage IS expected) write a placeholder
+    // record so the request stays visible downstream and the extraction gap is
+    // diagnosable, instead of silently dropping it. Non-completion traffic
+    // (e.g. /models, health checks) is skipped to avoid noise.
+    if (looksLikeCompletionRequest(reqPath)) {
+      writeMissingUsageRecord({
+        requestId,
+        provider,
+        model: model || requestModel || provider,
+        reqPath,
+        status: proxyRes.statusCode,
+        streaming,
+        duration,
+        responseBytes: state.totalBytes,
+      });
+    }
     if (typeof onSpanEnd === 'function') onSpanEnd(proxyRes.statusCode);
     return;
   }

diff --git a/containers/api-proxy/token-tracker-http.unit.test.js b/containers/api-proxy/token-tracker-http.unit.test.js
@@ -349,6 +349,114 @@ describe('finalizeHttpTracking', () => {
 
     expect(() => finalizeHttpTracking(state, makeProxyRes(200), opts)).not.toThrow();
   });
+
+  // ── missing-usage placeholder records ───────────────────────────────
+
+  describe('completion responses with no extractable usage', () => {
+    let mockStream;
+    let mkdirSyncSpy;
+    let createWriteStreamSpy;
+
+    function makeMockStream() {
+      const chunks = [];
+      const stream = {
+        writableEnded: false,
+        write: jest.fn((chunk) => { chunks.push(chunk); return true; }),
+        end: jest.fn((cb) => { stream.writableEnded = true; if (cb) cb(); }),
+        on: jest.fn(),
+        get writtenRecords() {
+          return chunks.map((c) => JSON.parse(c.trim()));
+        },
+      };
+      return stream;
+    }
+
+    beforeEach(async () => {
+      await closeLogStream();
+      mockStream = makeMockStream();
+      mkdirSyncSpy = jest.spyOn(fs, 'mkdirSync').mockReturnValue(undefined);
+      createWriteStreamSpy = jest.spyOn(fs, 'createWriteStream').mockReturnValue(mockStream);
+    });
+
+    afterEach(async () => {
+      mkdirSyncSpy.mockRestore();
+      createWriteStreamSpy.mockRestore();
+      await closeLogStream();
+    });
+
+    test('writes a usage_missing record for a completion path', () => {
+      const opts = makeOpts({ provider: 'copilot', path: '/chat/completions', requestModel: 'gpt-4o' });
+      const body = JSON.stringify({ id: 'x', choices: [{ delta: {}, finish_reason: 'stop' }] }); // no usage
+      const state = makeState({
+        chunks: [Buffer.from(body)],
+        bufferedBytes: body.length,
+        totalBytes: body.length,
+      });
+
+      finalizeHttpTracking(state, makeProxyRes(200), opts);
+
+      const records = mockStream.writtenRecords;
+      expect(records).toHaveLength(1);
+      expect(records[0]).toMatchObject({
+        event: 'token_usage',
+        provider: 'copilot',
+        model: 'gpt-4o',
+        path: '/chat/completions',
+        status: 200,
+        usage_missing: true,
+        input_tokens: 0,
+        output_tokens: 0,
+      });
+      // No real usage was measured, so metrics must not be incremented.
+      expect(opts.metrics.increment).not.toHaveBeenCalled();
+    });
+
+    test('writes a usage_missing record for a streaming completion response', () => {
+      const opts = makeOpts({ provider: 'copilot', path: '/chat/completions', requestModel: 'gpt-4o' });
+      const state = makeState({
+        streaming: true,
+        contentType: 'text/event-stream',
+        streamingUsage: {},
+        streamingModel: null,
+      });
+
+      finalizeHttpTracking(state, makeProxyRes(200), opts);
+
+      const records = mockStream.writtenRecords;
+      expect(records).toHaveLength(1);
+      expect(records[0]).toMatchObject({ usage_missing: true, streaming: true, provider: 'copilot' });
+    });
+
+    test('does NOT write a record for a non-completion path (e.g. /models)', () => {
+      const opts = makeOpts({ provider: 'copilot', path: '/v1/models' });
+      const body = JSON.stringify({ data: [] }); // no usage, not a completion endpoint
+      const state = makeState({
+        chunks: [Buffer.from(body)],
+        bufferedBytes: body.length,
+        totalBytes: body.length,
+      });
+
+      finalizeHttpTracking(state, makeProxyRes(200), opts);
+
+      expect(mockStream.writtenRecords).toHaveLength(0);
+    });
+
+    test('still calls onSpanEnd after writing the placeholder record', () => {
+      const onSpanEnd = jest.fn();
+      const opts = makeOpts({ provider: 'copilot', path: '/responses', onSpanEnd });
+      const body = JSON.stringify({ type: 'response.created' }); // no usage
+      const state = makeState({
+        chunks: [Buffer.from(body)],
+        bufferedBytes: body.length,
+        totalBytes: body.length,
+      });
+
+      finalizeHttpTracking(state, makeProxyRes(200), opts);
+
+      expect(onSpanEnd).toHaveBeenCalledWith(200);
+      expect(mockStream.writtenRecords).toHaveLength(1);
+    });
+  });
 });
 
 // ── extractUsageFromTrackedState ──────────────────────────────────────