Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 93 additions & 6 deletions .github/workflows/test-gvisor-firewall-comparison.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,22 @@ jobs:
-v /tmp/squid.conf:/etc/squid/squid.conf:ro \
ubuntu/squid:latest

sleep 3
# Wait for Squid to be ready (up to 30 seconds)
echo "Waiting for Squid to be ready..."
for i in {1..30}; do
if docker run --rm --network awf-test curlimages/curl:latest \
curl -s -o /dev/null -w "%{http_code}" \
-x http://172.30.0.10:3128 http://example.com | grep -q "200"; then
echo "✅ Squid is ready"
break
fi
if [ $i -eq 30 ]; then
echo "❌ Squid failed to start within 30 seconds"
docker logs squid-test
exit 1
fi
sleep 1
done

# Start agent with iptables DNAT
docker run --rm --name agent-test \
Expand Down Expand Up @@ -182,7 +197,22 @@ jobs:
-v /tmp/squid.conf:/etc/squid/squid.conf:ro \
ubuntu/squid:latest

sleep 3
# Wait for Squid to be ready (up to 30 seconds)
echo "Waiting for Squid to be ready..."
for i in {1..30}; do
if docker run --rm --network awf-test-gvisor curlimages/curl:latest \
curl -s -o /dev/null -w "%{http_code}" \
-x http://172.30.0.10:3128 http://example.com | grep -q "200"; then
echo "✅ Squid is ready"
break
fi
if [ $i -eq 30 ]; then
echo "❌ Squid failed to start within 30 seconds"
docker logs squid-gvisor
exit 1
fi
sleep 1
done

# Agent on gVisor with iptables
docker run --rm --name agent-gvisor \
Expand Down Expand Up @@ -318,7 +348,21 @@ jobs:
envoyproxy/envoy:v1.28-latest \
-c /etc/envoy/envoy.yaml

sleep 3
# Wait for Envoy to be ready (check admin port up to 30 seconds)
echo "Waiting for Envoy to be ready..."
for i in {1..30}; do
if docker run --rm --network envoy-test curlimages/curl:latest \
curl -s -o /dev/null -w "%{http_code}" http://172.30.0.10:9901/ready | grep -q "200"; then
echo "✅ Envoy is ready"
break
fi
if [ $i -eq 30 ]; then
echo "❌ Envoy failed to start within 30 seconds"
docker logs envoy-test
exit 1
fi
sleep 1
done

# Agent with iptables redirect to Envoy
docker run --rm --name agent-envoy \
Expand Down Expand Up @@ -443,7 +487,21 @@ jobs:
-v /tmp/envoy.yaml:/etc/envoy/envoy.yaml:ro \
envoyproxy/envoy:v1.28-latest -c /etc/envoy/envoy.yaml

sleep 3
# Wait for Envoy to be ready (check admin port up to 30 seconds)
echo "Waiting for Envoy to be ready..."
for i in {1..30}; do
if docker run --rm --network envoy-gvisor curlimages/curl:latest \
curl -s -o /dev/null -w "%{http_code}" http://172.30.0.10:9901/ready | grep -q "200"; then
echo "✅ Envoy is ready"
break
fi
if [ $i -eq 30 ]; then
echo "❌ Envoy failed to start within 30 seconds"
docker logs envoy-gvisor
exit 1
fi
sleep 1
done

# Agent on gVisor
docker run --rm --runtime=runsc \
Expand Down Expand Up @@ -507,7 +565,22 @@ jobs:
-v /tmp/squid.conf:/etc/squid/squid.conf:ro \
ubuntu/squid:latest

sleep 3
# Wait for Squid to be ready (up to 30 seconds)
echo "Waiting for Squid to be ready..."
for i in {1..30}; do
if docker run --rm --network squid-perf curlimages/curl:latest \
curl -s -o /dev/null -w "%{http_code}" \
-x http://squid-perf:3128 http://example.com | grep -q "200"; then
echo "✅ Squid is ready"
break
fi
if [ $i -eq 30 ]; then
echo "❌ Squid failed to start within 30 seconds"
docker logs squid-perf
exit 1
fi
sleep 1
done

# Run 100 requests and measure latency
AVG=$(docker run --rm --network squid-perf \
Expand Down Expand Up @@ -588,7 +661,21 @@ jobs:
-v /tmp/envoy.yaml:/etc/envoy/envoy.yaml:ro \
envoyproxy/envoy:v1.28-latest -c /etc/envoy/envoy.yaml

sleep 3
# Wait for Envoy to be ready (check admin port up to 30 seconds)
echo "Waiting for Envoy to be ready..."
for i in {1..30}; do
if docker run --rm --network envoy-perf curlimages/curl:latest \
curl -s -o /dev/null -w "%{http_code}" http://envoy-perf:9901/ready | grep -q "200"; then
echo "✅ Envoy is ready"
break
fi
if [ $i -eq 30 ]; then
echo "❌ Envoy failed to start within 30 seconds"
docker logs envoy-perf
exit 1
fi
sleep 1
done

# Run 100 requests and measure latency
AVG=$(docker run --rm --network envoy-perf \
Expand Down
30 changes: 30 additions & 0 deletions containers/api-proxy/token-parsers.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,35 @@ function isStreamingResponse(headers) {
return ct.includes('text/event-stream');
}

/**
* Heuristically determine whether a request path targets an LLM
* completion-style endpoint that is expected to report token usage.
*
* Used to decide whether a successful (2xx) response that yielded no
* extractable usage should still produce a token-usage record (marked
* `usage_missing`) rather than being silently dropped. Non-completion
* traffic (e.g. `/models`, health checks) is excluded to avoid noise.
*
* Matches: OpenAI/Copilot chat + responses + legacy completions, Anthropic
* messages, and Gemini generateContent/streamGenerateContent — across
* optional version prefixes and query strings.
*
* @param {string} path - Request path (may include a query string)
* @returns {boolean}
*/
function looksLikeCompletionRequest(path) {
if (typeof path !== 'string' || path.length === 0) return false;
const pathOnly = path.split('?')[0].toLowerCase();
return (
pathOnly.includes('/chat/completions') ||
pathOnly.includes('/responses') ||
pathOnly.includes('/messages') ||
pathOnly.includes('/completions') ||
pathOnly.includes(':generatecontent') ||
pathOnly.includes(':streamgeneratecontent')
);
}

/**
* Check if a response is gzip or deflate compressed.
*/
Expand Down Expand Up @@ -310,6 +339,7 @@ function normalizeUsage(usage) {

module.exports = {
isStreamingResponse,
looksLikeCompletionRequest,
isCompressedResponse,
createDecompressor,
extractReasoningTokens,
Expand Down
71 changes: 71 additions & 0 deletions containers/api-proxy/token-tracker-http.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
const { logRequest } = require('./logging');
const {
isStreamingResponse,
looksLikeCompletionRequest,
isCompressedResponse,
createDecompressor,
parseSseDataLines,
Expand Down Expand Up @@ -268,6 +269,59 @@ function buildAndWriteTokenRecord(normalized, { requestId, provider, model, reqP
});
}

/**
* Persist a placeholder token-usage record for a successful completion-style
* response from which no usage could be extracted.
*
* Without this, a 2xx LLM response whose body omits a usage payload (observed
* with some Copilot streaming responses) produces NO line in token-usage.jsonl
* at all — the request becomes invisible to downstream consumers (step
* summaries, OTEL fan-out, AI-credit aggregation). Writing a zeroed record
* flagged with `usage_missing: true` keeps the request observable and makes the
* extraction gap diagnosable, while clearly signalling that the token counts
* are not real measured values.
*
* @param {object} params
* @param {string} params.requestId
* @param {string} params.provider
* @param {string|null} params.model
* @param {string} params.reqPath
* @param {number} params.status
* @param {boolean} params.streaming
* @param {number} params.duration
* @param {number} params.responseBytes
*/
function writeMissingUsageRecord({ requestId, provider, model, reqPath, status, streaming, duration, responseBytes }) {
const zeroUsage = {
input_tokens: 0,
output_tokens: 0,
cache_read_tokens: 0,
cache_write_tokens: 0,
reasoning_tokens: 0,
};
const record = buildTokenUsageRecord(zeroUsage, {
requestId,
provider,
model,
reqPath,
status,
streaming,
duration,
responseBytes,
});
record.usage_missing = true;
writeTokenUsage(record);

logRequest('warn', 'token_usage_missing', {
request_id: requestId,
provider,
model: model || 'unknown',
path: reqPath,
streaming,
status,
});
}

/**
* Finalize token tracking for an HTTP response.
*
Expand Down Expand Up @@ -313,6 +367,23 @@ function finalizeHttpTracking(state, proxyRes, opts) {

const normalized = normalizeUsage(usage);
if (!normalized) {
// No usage payload was extracted from a successful response. For
// completion-style endpoints (where usage IS expected) write a placeholder
// record so the request stays visible downstream and the extraction gap is
// diagnosable, instead of silently dropping it. Non-completion traffic
// (e.g. /models, health checks) is skipped to avoid noise.
if (looksLikeCompletionRequest(reqPath)) {
writeMissingUsageRecord({
requestId,
provider,
model: model || requestModel || provider,
reqPath,
status: proxyRes.statusCode,
streaming,
duration,
responseBytes: state.totalBytes,
});
}
if (typeof onSpanEnd === 'function') onSpanEnd(proxyRes.statusCode);
return;
}
Expand Down
108 changes: 108 additions & 0 deletions containers/api-proxy/token-tracker-http.unit.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,114 @@ describe('finalizeHttpTracking', () => {

expect(() => finalizeHttpTracking(state, makeProxyRes(200), opts)).not.toThrow();
});

// ── missing-usage placeholder records ───────────────────────────────

describe('completion responses with no extractable usage', () => {
let mockStream;
let mkdirSyncSpy;
let createWriteStreamSpy;

function makeMockStream() {
const chunks = [];
const stream = {
writableEnded: false,
write: jest.fn((chunk) => { chunks.push(chunk); return true; }),
end: jest.fn((cb) => { stream.writableEnded = true; if (cb) cb(); }),
on: jest.fn(),
get writtenRecords() {
return chunks.map((c) => JSON.parse(c.trim()));
},
};
return stream;
}

beforeEach(async () => {
await closeLogStream();
mockStream = makeMockStream();
mkdirSyncSpy = jest.spyOn(fs, 'mkdirSync').mockReturnValue(undefined);
createWriteStreamSpy = jest.spyOn(fs, 'createWriteStream').mockReturnValue(mockStream);
});

afterEach(async () => {
mkdirSyncSpy.mockRestore();
createWriteStreamSpy.mockRestore();
await closeLogStream();
});

test('writes a usage_missing record for a completion path', () => {
const opts = makeOpts({ provider: 'copilot', path: '/chat/completions', requestModel: 'gpt-4o' });
const body = JSON.stringify({ id: 'x', choices: [{ delta: {}, finish_reason: 'stop' }] }); // no usage
const state = makeState({
chunks: [Buffer.from(body)],
bufferedBytes: body.length,
totalBytes: body.length,
});

finalizeHttpTracking(state, makeProxyRes(200), opts);

const records = mockStream.writtenRecords;
expect(records).toHaveLength(1);
expect(records[0]).toMatchObject({
event: 'token_usage',
provider: 'copilot',
model: 'gpt-4o',
path: '/chat/completions',
status: 200,
usage_missing: true,
input_tokens: 0,
output_tokens: 0,
});
// No real usage was measured, so metrics must not be incremented.
expect(opts.metrics.increment).not.toHaveBeenCalled();
});

test('writes a usage_missing record for a streaming completion response', () => {
const opts = makeOpts({ provider: 'copilot', path: '/chat/completions', requestModel: 'gpt-4o' });
const state = makeState({
streaming: true,
contentType: 'text/event-stream',
streamingUsage: {},
streamingModel: null,
});

finalizeHttpTracking(state, makeProxyRes(200), opts);

const records = mockStream.writtenRecords;
expect(records).toHaveLength(1);
expect(records[0]).toMatchObject({ usage_missing: true, streaming: true, provider: 'copilot' });
});

test('does NOT write a record for a non-completion path (e.g. /models)', () => {
const opts = makeOpts({ provider: 'copilot', path: '/v1/models' });
const body = JSON.stringify({ data: [] }); // no usage, not a completion endpoint
const state = makeState({
chunks: [Buffer.from(body)],
bufferedBytes: body.length,
totalBytes: body.length,
});

finalizeHttpTracking(state, makeProxyRes(200), opts);

expect(mockStream.writtenRecords).toHaveLength(0);
});

test('still calls onSpanEnd after writing the placeholder record', () => {
const onSpanEnd = jest.fn();
const opts = makeOpts({ provider: 'copilot', path: '/responses', onSpanEnd });
const body = JSON.stringify({ type: 'response.created' }); // no usage
const state = makeState({
chunks: [Buffer.from(body)],
bufferedBytes: body.length,
totalBytes: body.length,
});

finalizeHttpTracking(state, makeProxyRes(200), opts);

expect(onSpanEnd).toHaveBeenCalledWith(200);
expect(mockStream.writtenRecords).toHaveLength(1);
});
});
});

// ── extractUsageFromTrackedState ──────────────────────────────────────
Expand Down
Loading
Loading