Skip to content

Commit ed2594b

Browse files
Saga4claude
andcommitted
fix: resolve Vitest benchmarking showing wall-clock time instead of per-function timing
Root cause: Vitest performance tests reported "20.0 seconds over 1 loop" (JUnit XML wall-clock fallback) instead of actual per-function nanosecond timing. This was a chain of two issues: 1. **stdout interception**: Vitest's default `threads` pool intercepts process.stdout.write() and console.log(), preventing timing markers from flowing to the parent process. Fixed by adding `--pool=forks` to all Vitest commands and config files. The `forks` pool uses child processes where stdout flows directly to the parent. 2. **test name detection**: Even after markers flowed through (43,000+ found in stdout), the parser couldn't match them to JUnit XML testcases because all markers had "unknown" as the test name. This happened because Vitest doesn't inject `beforeEach` as a global (unlike Jest), so capture.js's Jest-style hook to set `currentTestName` never fired. Fixed by adding Vitest-specific test name detection in capture.js: - Primary: `expect.getState().currentTestName` (full describe path) - Fallback: `__vitest_worker__.current.fullTestName` - Defense-in-depth: parser fallback matches "unknown" markers to the first testcase when no name match is found Result: cheerio's `isHtml` went from "20.0s / 1 loop" to "902μs / 20,853 loops" with proper speedup analysis. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a78b4cc commit ed2594b

5 files changed

Lines changed: 207 additions & 36 deletions

File tree

codeflash/languages/javascript/parse.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -172,22 +172,12 @@ def parse_jest_test_xml(
172172
if global_stdout:
173173
marker_count = len(jest_start_pattern.findall(global_stdout))
174174
if marker_count > 0:
175-
logger.debug(f"Found {marker_count} timing start markers in Jest stdout")
176-
else:
177-
logger.debug(f"No timing start markers found in Jest stdout (len={len(global_stdout)})")
178-
# Check for END markers with duration (perf test markers)
175+
logger.debug(f"Found {marker_count} timing start markers in subprocess stdout")
179176
end_marker_count = len(jest_end_pattern.findall(global_stdout))
180177
if end_marker_count > 0:
181-
logger.debug(
182-
f"[PERF-DEBUG] Found {end_marker_count} END timing markers with duration in Jest stdout"
183-
)
184-
# Sample a few markers to verify loop indices
185-
end_samples = list(jest_end_pattern.finditer(global_stdout))[:5]
186-
for sample in end_samples:
187-
groups = sample.groups()
188-
logger.debug(f"[PERF-DEBUG] Sample END marker: loopIndex={groups[3]}, duration={groups[5]}")
178+
logger.debug(f"Found {end_marker_count} END timing markers with duration in subprocess stdout")
189179
else:
190-
logger.debug("[PERF-DEBUG] No END markers with duration found in Jest stdout")
180+
logger.debug(f"No END markers found in subprocess stdout (len={len(global_stdout)})")
191181
except (AttributeError, UnicodeDecodeError):
192182
global_stdout = ""
193183

@@ -215,6 +205,10 @@ def parse_jest_test_xml(
215205
# Key: (testName, testName2, funcName, loopIndex, lineId)
216206
key = match.groups()[:5]
217207
end_matches_dict[key] = match
208+
logger.debug(
209+
f"Suite {suite_count}: combined_stdout len={len(combined_stdout)}, "
210+
f"start_matches={len(start_matches)}, end_matches={len(end_matches_dict)}"
211+
)
218212

219213
# Debug: log suite-level END marker parsing for perf tests
220214
if end_matches_dict:
@@ -371,6 +365,25 @@ def parse_jest_test_xml(
371365
# end_key is (module, testName, funcName, loopIndex, invocationId)
372366
if len(end_key) >= 2 and sanitized_test_name in end_key[1]:
373367
matching_ends_direct.append(end_match)
368+
369+
# Fallback: If no matches found but END markers exist with "unknown" test name
370+
# (happens in Vitest where beforeEach hook doesn't fire to set currentTestName),
371+
# match ALL "unknown" markers to this testcase. Use a consumed set to avoid
372+
# assigning the same marker to multiple testcases.
373+
if not matching_ends_direct and end_matches_dict:
374+
unknown_markers = [(k, m) for k, m in end_matches_dict.items() if len(k) >= 2 and k[1] == "unknown"]
375+
if unknown_markers:
376+
# Assign all unconsumed unknown markers to this testcase
377+
for _, end_match in unknown_markers:
378+
matching_ends_direct.append(end_match)
379+
# Remove consumed markers so they aren't double-assigned to other testcases
380+
for end_key, _ in unknown_markers:
381+
end_matches_dict.pop(end_key, None)
382+
logger.debug(
383+
f"[PERF-UNKNOWN-MATCH] Testcase '{test_name[:40]}': matched {len(matching_ends_direct)} "
384+
f"'unknown' END markers (Vitest fallback)"
385+
)
386+
374387
# Debug: log matching results for perf tests
375388
if matching_ends_direct:
376389
loop_indices = [int(m.groups()[3]) if m.groups()[3].isdigit() else 1 for m in matching_ends_direct]

codeflash/languages/javascript/vitest_runner.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ def _ensure_codeflash_vitest_config(project_root: Path) -> Path | None:
225225
test: {{
226226
// Override include pattern to match all test files including generated ones
227227
include: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
228+
// Use forks pool so timing markers from process.stdout.write flow to parent stdout
229+
pool: 'forks',
228230
}},
229231
}});
230232
"""
@@ -239,6 +241,8 @@ def _ensure_codeflash_vitest_config(project_root: Path) -> Path | None:
239241
include: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
240242
// Exclude common non-test directories
241243
exclude: ['**/node_modules/**', '**/dist/**'],
244+
// Use forks pool so timing markers from process.stdout.write flow to parent stdout
245+
pool: 'forks',
242246
},
243247
});
244248
"""
@@ -277,6 +281,7 @@ def _build_vitest_behavioral_command(
277281
"--reporter=default",
278282
"--reporter=junit",
279283
"--no-file-parallelism", # Serial execution for deterministic timing
284+
"--pool=forks", # Use child processes so timing markers flow to parent stdout
280285
]
281286

282287
# For monorepos with restrictive vitest configs (e.g., include: test/**/*.test.ts),
@@ -326,6 +331,7 @@ def _build_vitest_benchmarking_command(
326331
"--reporter=default",
327332
"--reporter=junit",
328333
"--no-file-parallelism", # Serial execution for consistent benchmarking
334+
"--pool=forks", # Use child processes so timing markers flow to parent stdout
329335
]
330336

331337
# Use codeflash vitest config to override restrictive include patterns
@@ -656,11 +662,6 @@ def run_vitest_benchmarking_tests(
656662
)
657663
else:
658664
logger.debug(f"[VITEST-BENCH] No perf END markers found in stdout (len={len(result.stdout)})")
659-
# Check if there are behavior END markers instead
660-
behavior_end_pattern = re.compile(r"!######[^:]+:[^:]+:[^:]+:\d+:[^#]+######!")
661-
behavior_matches = list(behavior_end_pattern.finditer(result.stdout))
662-
if behavior_matches:
663-
logger.debug(f"[VITEST-BENCH] Found {len(behavior_matches)} behavior END markers instead (no duration)")
664665

665666
return result_file_path, result
666667

@@ -719,6 +720,7 @@ def run_vitest_line_profile_tests(
719720
"--reporter=default",
720721
"--reporter=junit",
721722
"--no-file-parallelism", # Serial execution for consistent line profiling
723+
"--pool=forks", # Use child processes so timing markers flow to parent stdout
722724
]
723725

724726
# Use codeflash vitest config to override restrictive include patterns

packages/codeflash/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "codeflash",
3-
"version": "0.10.0",
3+
"version": "0.10.1",
44
"description": "Codeflash - AI-powered code optimization for JavaScript and TypeScript",
55
"main": "runtime/index.js",
66
"types": "runtime/index.d.ts",

packages/codeflash/runtime/capture.js

Lines changed: 106 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,96 @@ if (RANDOM_SEED !== 0) {
266266
}
267267
}
268268

269-
// Current test context (set by Jest hooks)
269+
// Current test context (set by Jest hooks or Vitest worker)
270270
let currentTestName = null;
271271
let currentTestPath = null; // Test file path from Jest
272272

273+
/**
274+
* Get the current test name from Vitest's internal worker API.
275+
* Vitest doesn't inject beforeEach as a global, so the Jest-style hook doesn't fire.
276+
* Instead, we query Vitest's worker directly for the current test name.
277+
*
278+
* In Vitest's fork pool, `__vitest_worker__.current` is the current task object
279+
* with properties: name, fullName, fullTestName, suite, type, file, etc.
280+
* Also, `expect.getState().currentTestName` works from within a test.
281+
*
282+
* @returns {string|null} The current test name, or null if not in Vitest
283+
*/
284+
function getVitestTestName() {
285+
// Prefer expect.getState().currentTestName — returns full path including describe blocks
286+
// e.g., "Performance tests > should return true for basic HTML tags"
287+
// This matches what Jest's beforeEach hook would set.
288+
try {
289+
if (typeof expect !== 'undefined' && expect.getState) {
290+
const state = expect.getState();
291+
if (state?.currentTestName) {
292+
return state.currentTestName;
293+
}
294+
}
295+
} catch (e) {
296+
// expect not available
297+
}
298+
// Fallback: Vitest worker API — worker.current.fullTestName includes describe path
299+
try {
300+
const worker = globalThis.__vitest_worker__;
301+
if (worker?.current?.fullTestName) {
302+
return worker.current.fullTestName;
303+
}
304+
if (worker?.current?.fullName) {
305+
return worker.current.fullName;
306+
}
307+
if (worker?.current?.name) {
308+
return worker.current.name;
309+
}
310+
} catch (e) {
311+
// Not in Vitest context
312+
}
313+
return null;
314+
}
315+
316+
/**
317+
* Get the current test file path from Vitest's internal worker API.
318+
* @returns {string|null} The current test file path, or null if not in Vitest
319+
*/
320+
function getVitestTestPath() {
321+
try {
322+
const worker = globalThis.__vitest_worker__;
323+
if (worker?.filepath) {
324+
return worker.filepath;
325+
}
326+
} catch (e) {
327+
// Not in Vitest context
328+
}
329+
// Fallback: try expect.getState() for testPath
330+
try {
331+
if (typeof expect !== 'undefined' && expect.getState) {
332+
const state = expect.getState();
333+
if (state?.testPath) {
334+
return state.testPath;
335+
}
336+
}
337+
} catch (e) {
338+
// expect not available
339+
}
340+
return null;
341+
}
342+
343+
/**
344+
* Get the effective test name, trying Jest hooks first, then Vitest API, then fallback.
345+
* @returns {string} The current test name
346+
*/
347+
function getEffectiveTestName() {
348+
return currentTestName || getVitestTestName() || 'unknown';
349+
}
350+
351+
/**
352+
* Get the effective test path, trying Jest hooks first, then Vitest API, then fallback.
353+
* @returns {string|null} The current test file path
354+
*/
355+
function getEffectiveTestPath() {
356+
return currentTestPath || getVitestTestPath() || null;
357+
}
358+
273359
// Invocation counter map: tracks how many times each testId has been seen
274360
// Key: testId (testModule:testClass:testFunction:lineId:loopIndex)
275361
// Value: count (starts at 0, increments each time same key is seen)
@@ -549,13 +635,14 @@ function capture(funcName, lineId, fn, ...args) {
549635

550636
// Get test context (raw values for SQLite storage)
551637
// Use TEST_MODULE env var if set, otherwise derive from test file path
638+
const effectiveTestPath = getEffectiveTestPath();
552639
let testModulePath;
553640
if (TEST_MODULE) {
554641
testModulePath = TEST_MODULE;
555-
} else if (currentTestPath) {
642+
} else if (effectiveTestPath) {
556643
// Get relative path from cwd and convert to module-style path
557644
const path = require('path');
558-
const relativePath = path.relative(process.cwd(), currentTestPath);
645+
const relativePath = path.relative(process.cwd(), effectiveTestPath);
559646
// Convert to Python module-style path (e.g., "tests/test_foo.test.js" -> "tests.test_foo.test")
560647
// This matches what Jest's junit XML produces
561648
testModulePath = relativePath
@@ -564,10 +651,10 @@ function capture(funcName, lineId, fn, ...args) {
564651
.replace(/\.test$/, '.test') // Keep .test suffix
565652
.replace(/\//g, '.'); // Convert path separators to dots
566653
} else {
567-
testModulePath = currentTestName || 'unknown';
654+
testModulePath = getEffectiveTestName();
568655
}
569656
const testClassName = null; // Jest doesn't use classes like Python
570-
const testFunctionName = currentTestName || 'unknown';
657+
const testFunctionName = getEffectiveTestName();
571658

572659
// Sanitized versions for stdout tags (avoid regex conflicts)
573660
const safeModulePath = sanitizeTestId(testModulePath);
@@ -583,8 +670,8 @@ function capture(funcName, lineId, fn, ...args) {
583670
// Format stdout tag (matches Python format, uses sanitized names)
584671
const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${LOOP_INDEX}:${invocationId}`;
585672

586-
// Print start tag
587-
console.log(`!$######${testStdoutTag}######$!`);
673+
// Print start tag (use process.stdout.write to bypass test framework console interception)
674+
process.stdout.write(`!$######${testStdoutTag}######$!\n`);
588675

589676
// Timing with nanosecond precision
590677
const startTime = getTimeNs();
@@ -602,14 +689,14 @@ function capture(funcName, lineId, fn, ...args) {
602689
const durationNs = getDurationNs(startTime, endTime);
603690
recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, resolved, null, durationNs);
604691
// Print end tag (no duration for behavior mode)
605-
console.log(`!######${testStdoutTag}######!`);
692+
process.stdout.write(`!######${testStdoutTag}######!\n`);
606693
return resolved;
607694
},
608695
(err) => {
609696
const endTime = getTimeNs();
610697
const durationNs = getDurationNs(startTime, endTime);
611698
recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, null, err, durationNs);
612-
console.log(`!######${testStdoutTag}######!`);
699+
process.stdout.write(`!######${testStdoutTag}######!\n`);
613700
throw err;
614701
}
615702
);
@@ -623,7 +710,7 @@ function capture(funcName, lineId, fn, ...args) {
623710
recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, returnValue, error, durationNs);
624711

625712
// Print end tag (no duration for behavior mode, matching Python)
626-
console.log(`!######${testStdoutTag}######!`);
713+
process.stdout.write(`!######${testStdoutTag}######!\n`);
627714

628715
if (error) throw error;
629716
return returnValue;
@@ -656,22 +743,24 @@ function capturePerf(funcName, lineId, fn, ...args) {
656743
const shouldLoop = getPerfLoopCount() > 1 && !checkSharedTimeLimit();
657744

658745
// Get test context (computed once, reused across batch)
746+
// Uses Vitest worker API as fallback when Jest-style beforeEach hook doesn't fire
747+
const effectiveTestPath = getEffectiveTestPath();
659748
let testModulePath;
660749
if (TEST_MODULE) {
661750
testModulePath = TEST_MODULE;
662-
} else if (currentTestPath) {
751+
} else if (effectiveTestPath) {
663752
const path = require('path');
664-
const relativePath = path.relative(process.cwd(), currentTestPath);
753+
const relativePath = path.relative(process.cwd(), effectiveTestPath);
665754
testModulePath = relativePath
666755
.replace(/\\/g, '/')
667756
.replace(/\.js$/, '')
668757
.replace(/\.test$/, '.test')
669758
.replace(/\//g, '.');
670759
} else {
671-
testModulePath = currentTestName || 'unknown';
760+
testModulePath = getEffectiveTestName();
672761
}
673762
const testClassName = null;
674-
const testFunctionName = currentTestName || 'unknown';
763+
const testFunctionName = getEffectiveTestName();
675764

676765
const safeModulePath = sanitizeTestId(testModulePath);
677766
const safeTestFunctionName = sanitizeTestId(testFunctionName);
@@ -767,8 +856,8 @@ function capturePerf(funcName, lineId, fn, ...args) {
767856
lastError = e;
768857
}
769858

770-
// Print end tag with timing
771-
console.log(`!######${testStdoutTag}:${durationNs}######!`);
859+
// Print end tag with timing (use process.stdout.write to bypass test framework console interception)
860+
process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
772861

773862
// Update shared loop counter
774863
sharedPerfState.totalLoopsCompleted++;
@@ -808,7 +897,7 @@ function capturePerf(funcName, lineId, fn, ...args) {
808897
* @private
809898
*/
810899
function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) {
811-
console.log(`!######${testStdoutTag}:${durationNs}######!`);
900+
process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
812901
sharedPerfState.totalLoopsCompleted++;
813902
if (durationNs > 0) {
814903
runtimes.push(durationNs / 1000);

0 commit comments

Comments
 (0)