fix: resolve Vitest benchmarking showing wall-clock time instead of per-function timing

Saga4 · claude · Saga4 · commit ed2594bfd1e6 · 2026-03-03T14:31:10.000+05:30
Root cause: Vitest performance tests reported "20.0 seconds over 1 loop"
(JUnit XML wall-clock fallback) instead of actual per-function nanosecond
timing. This was a chain of two issues:

1. **stdout interception**: Vitest's default `threads` pool intercepts
   process.stdout.write() and console.log(), preventing timing markers
   from flowing to the parent process. Fixed by adding `--pool=forks`
   to all Vitest commands and config files. The `forks` pool uses child
   processes where stdout flows directly to the parent.

2. **test name detection**: Even after markers flowed through (43,000+
   found in stdout), the parser couldn't match them to JUnit XML
   testcases because all markers had "unknown" as the test name. This
   happened because Vitest doesn't inject `beforeEach` as a global
   (unlike Jest), so capture.js's Jest-style hook to set
   `currentTestName` never fired.

   Fixed by adding Vitest-specific test name detection in capture.js:
   - Primary: `expect.getState().currentTestName` (full describe path)
   - Fallback: `__vitest_worker__.current.fullTestName`
   - Defense-in-depth: parser fallback matches "unknown" markers to
     the first testcase when no name match is found

Result: cheerio's `isHtml` went from "20.0s / 1 loop" to
"902μs / 20,853 loops" with proper speedup analysis.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/codeflash/languages/javascript/parse.py b/codeflash/languages/javascript/parse.py
@@ -172,22 +172,12 @@ def parse_jest_test_xml(
             if global_stdout:
                 marker_count = len(jest_start_pattern.findall(global_stdout))
                 if marker_count > 0:
-                    logger.debug(f"Found {marker_count} timing start markers in Jest stdout")
-                else:
-                    logger.debug(f"No timing start markers found in Jest stdout (len={len(global_stdout)})")
-                # Check for END markers with duration (perf test markers)
+                    logger.debug(f"Found {marker_count} timing start markers in subprocess stdout")
                 end_marker_count = len(jest_end_pattern.findall(global_stdout))
                 if end_marker_count > 0:
-                    logger.debug(
-                        f"[PERF-DEBUG] Found {end_marker_count} END timing markers with duration in Jest stdout"
-                    )
-                    # Sample a few markers to verify loop indices
-                    end_samples = list(jest_end_pattern.finditer(global_stdout))[:5]
-                    for sample in end_samples:
-                        groups = sample.groups()
-                        logger.debug(f"[PERF-DEBUG] Sample END marker: loopIndex={groups[3]}, duration={groups[5]}")
+                    logger.debug(f"Found {end_marker_count} END timing markers with duration in subprocess stdout")
                 else:
-                    logger.debug("[PERF-DEBUG] No END markers with duration found in Jest stdout")
+                    logger.debug(f"No END markers found in subprocess stdout (len={len(global_stdout)})")
         except (AttributeError, UnicodeDecodeError):
             global_stdout = ""
 
@@ -215,6 +205,10 @@ def parse_jest_test_xml(
             # Key: (testName, testName2, funcName, loopIndex, lineId)
             key = match.groups()[:5]
             end_matches_dict[key] = match
+        logger.debug(
+            f"Suite {suite_count}: combined_stdout len={len(combined_stdout)}, "
+            f"start_matches={len(start_matches)}, end_matches={len(end_matches_dict)}"
+        )
 
         # Debug: log suite-level END marker parsing for perf tests
         if end_matches_dict:
@@ -371,6 +365,25 @@ def parse_jest_test_xml(
                     # end_key is (module, testName, funcName, loopIndex, invocationId)
                     if len(end_key) >= 2 and sanitized_test_name in end_key[1]:
                         matching_ends_direct.append(end_match)
+
+                # Fallback: If no matches found but END markers exist with "unknown" test name
+                # (happens in Vitest where beforeEach hook doesn't fire to set currentTestName),
+                # match ALL "unknown" markers to this testcase. Use a consumed set to avoid
+                # assigning the same marker to multiple testcases.
+                if not matching_ends_direct and end_matches_dict:
+                    unknown_markers = [(k, m) for k, m in end_matches_dict.items() if len(k) >= 2 and k[1] == "unknown"]
+                    if unknown_markers:
+                        # Assign all unconsumed unknown markers to this testcase
+                        for _, end_match in unknown_markers:
+                            matching_ends_direct.append(end_match)
+                        # Remove consumed markers so they aren't double-assigned to other testcases
+                        for end_key, _ in unknown_markers:
+                            end_matches_dict.pop(end_key, None)
+                        logger.debug(
+                            f"[PERF-UNKNOWN-MATCH] Testcase '{test_name[:40]}': matched {len(matching_ends_direct)} "
+                            f"'unknown' END markers (Vitest fallback)"
+                        )
+
                 # Debug: log matching results for perf tests
                 if matching_ends_direct:
                     loop_indices = [int(m.groups()[3]) if m.groups()[3].isdigit() else 1 for m in matching_ends_direct]
diff --git a/codeflash/languages/javascript/vitest_runner.py b/codeflash/languages/javascript/vitest_runner.py
@@ -225,6 +225,8 @@ def _ensure_codeflash_vitest_config(project_root: Path) -> Path | None:
   test: {{
     // Override include pattern to match all test files including generated ones
     include: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
+    // Use forks pool so timing markers from process.stdout.write flow to parent stdout
+    pool: 'forks',
   }},
 }});
 """
@@ -239,6 +241,8 @@ def _ensure_codeflash_vitest_config(project_root: Path) -> Path | None:
     include: ['**/*.test.ts', '**/*.test.js', '**/*.test.tsx', '**/*.test.jsx'],
     // Exclude common non-test directories
     exclude: ['**/node_modules/**', '**/dist/**'],
+    // Use forks pool so timing markers from process.stdout.write flow to parent stdout
+    pool: 'forks',
   },
 });
 """
@@ -277,6 +281,7 @@ def _build_vitest_behavioral_command(
         "--reporter=default",
         "--reporter=junit",
         "--no-file-parallelism",  # Serial execution for deterministic timing
+        "--pool=forks",  # Use child processes so timing markers flow to parent stdout
     ]
 
     # For monorepos with restrictive vitest configs (e.g., include: test/**/*.test.ts),
@@ -326,6 +331,7 @@ def _build_vitest_benchmarking_command(
         "--reporter=default",
         "--reporter=junit",
         "--no-file-parallelism",  # Serial execution for consistent benchmarking
+        "--pool=forks",  # Use child processes so timing markers flow to parent stdout
     ]
 
     # Use codeflash vitest config to override restrictive include patterns
@@ -656,11 +662,6 @@ def run_vitest_benchmarking_tests(
             )
         else:
             logger.debug(f"[VITEST-BENCH] No perf END markers found in stdout (len={len(result.stdout)})")
-            # Check if there are behavior END markers instead
-            behavior_end_pattern = re.compile(r"!######[^:]+:[^:]+:[^:]+:\d+:[^#]+######!")
-            behavior_matches = list(behavior_end_pattern.finditer(result.stdout))
-            if behavior_matches:
-                logger.debug(f"[VITEST-BENCH] Found {len(behavior_matches)} behavior END markers instead (no duration)")
 
     return result_file_path, result
 
@@ -719,6 +720,7 @@ def run_vitest_line_profile_tests(
         "--reporter=default",
         "--reporter=junit",
         "--no-file-parallelism",  # Serial execution for consistent line profiling
+        "--pool=forks",  # Use child processes so timing markers flow to parent stdout
     ]
 
     # Use codeflash vitest config to override restrictive include patterns
diff --git a/packages/codeflash/package.json b/packages/codeflash/package.json
@@ -1,6 +1,6 @@
 {
   "name": "codeflash",
-  "version": "0.10.0",
+  "version": "0.10.1",
   "description": "Codeflash - AI-powered code optimization for JavaScript and TypeScript",
   "main": "runtime/index.js",
   "types": "runtime/index.d.ts",
diff --git a/packages/codeflash/runtime/capture.js b/packages/codeflash/runtime/capture.js
@@ -266,10 +266,96 @@ if (RANDOM_SEED !== 0) {
     }
 }
 
-// Current test context (set by Jest hooks)
+// Current test context (set by Jest hooks or Vitest worker)
 let currentTestName = null;
 let currentTestPath = null;  // Test file path from Jest
 
+/**
+ * Get the current test name from Vitest's internal worker API.
+ * Vitest doesn't inject beforeEach as a global, so the Jest-style hook doesn't fire.
+ * Instead, we query Vitest's worker directly for the current test name.
+ *
+ * In Vitest's fork pool, `__vitest_worker__.current` is the current task object
+ * with properties: name, fullName, fullTestName, suite, type, file, etc.
+ * Also, `expect.getState().currentTestName` works from within a test.
+ *
+ * @returns {string|null} The current test name, or null if not in Vitest
+ */
+function getVitestTestName() {
+    // Prefer expect.getState().currentTestName — returns full path including describe blocks
+    // e.g., "Performance tests > should return true for basic HTML tags"
+    // This matches what Jest's beforeEach hook would set.
+    try {
+        if (typeof expect !== 'undefined' && expect.getState) {
+            const state = expect.getState();
+            if (state?.currentTestName) {
+                return state.currentTestName;
+            }
+        }
+    } catch (e) {
+        // expect not available
+    }
+    // Fallback: Vitest worker API — worker.current.fullTestName includes describe path
+    try {
+        const worker = globalThis.__vitest_worker__;
+        if (worker?.current?.fullTestName) {
+            return worker.current.fullTestName;
+        }
+        if (worker?.current?.fullName) {
+            return worker.current.fullName;
+        }
+        if (worker?.current?.name) {
+            return worker.current.name;
+        }
+    } catch (e) {
+        // Not in Vitest context
+    }
+    return null;
+}
+
+/**
+ * Get the current test file path from Vitest's internal worker API.
+ * @returns {string|null} The current test file path, or null if not in Vitest
+ */
+function getVitestTestPath() {
+    try {
+        const worker = globalThis.__vitest_worker__;
+        if (worker?.filepath) {
+            return worker.filepath;
+        }
+    } catch (e) {
+        // Not in Vitest context
+    }
+    // Fallback: try expect.getState() for testPath
+    try {
+        if (typeof expect !== 'undefined' && expect.getState) {
+            const state = expect.getState();
+            if (state?.testPath) {
+                return state.testPath;
+            }
+        }
+    } catch (e) {
+        // expect not available
+    }
+    return null;
+}
+
+/**
+ * Get the effective test name, trying Jest hooks first, then Vitest API, then fallback.
+ * @returns {string} The current test name
+ */
+function getEffectiveTestName() {
+    return currentTestName || getVitestTestName() || 'unknown';
+}
+
+/**
+ * Get the effective test path, trying Jest hooks first, then Vitest API, then fallback.
+ * @returns {string|null} The current test file path
+ */
+function getEffectiveTestPath() {
+    return currentTestPath || getVitestTestPath() || null;
+}
+
 // Invocation counter map: tracks how many times each testId has been seen
 // Key: testId (testModule:testClass:testFunction:lineId:loopIndex)
 // Value: count (starts at 0, increments each time same key is seen)
@@ -549,13 +635,14 @@ function capture(funcName, lineId, fn, ...args) {
 
     // Get test context (raw values for SQLite storage)
     // Use TEST_MODULE env var if set, otherwise derive from test file path
+    const effectiveTestPath = getEffectiveTestPath();
     let testModulePath;
     if (TEST_MODULE) {
         testModulePath = TEST_MODULE;
-    } else if (currentTestPath) {
+    } else if (effectiveTestPath) {
         // Get relative path from cwd and convert to module-style path
         const path = require('path');
-        const relativePath = path.relative(process.cwd(), currentTestPath);
+        const relativePath = path.relative(process.cwd(), effectiveTestPath);
         // Convert to Python module-style path (e.g., "tests/test_foo.test.js" -> "tests.test_foo.test")
         // This matches what Jest's junit XML produces
         testModulePath = relativePath
@@ -564,10 +651,10 @@ function capture(funcName, lineId, fn, ...args) {
             .replace(/\.test$/, '.test') // Keep .test suffix
             .replace(/\//g, '.');       // Convert path separators to dots
     } else {
-        testModulePath = currentTestName || 'unknown';
+        testModulePath = getEffectiveTestName();
     }
     const testClassName = null;  // Jest doesn't use classes like Python
-    const testFunctionName = currentTestName || 'unknown';
+    const testFunctionName = getEffectiveTestName();
 
     // Sanitized versions for stdout tags (avoid regex conflicts)
     const safeModulePath = sanitizeTestId(testModulePath);
@@ -583,8 +670,8 @@ function capture(funcName, lineId, fn, ...args) {
     // Format stdout tag (matches Python format, uses sanitized names)
     const testStdoutTag = `${safeModulePath}:${testClassName ? testClassName + '.' : ''}${safeTestFunctionName}:${funcName}:${LOOP_INDEX}:${invocationId}`;
 
-    // Print start tag
-    console.log(`!$######${testStdoutTag}######$!`);
+    // Print start tag (use process.stdout.write to bypass test framework console interception)
+    process.stdout.write(`!$######${testStdoutTag}######$!\n`);
 
     // Timing with nanosecond precision
     const startTime = getTimeNs();
@@ -602,14 +689,14 @@ function capture(funcName, lineId, fn, ...args) {
                     const durationNs = getDurationNs(startTime, endTime);
                     recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, resolved, null, durationNs);
                     // Print end tag (no duration for behavior mode)
-                    console.log(`!######${testStdoutTag}######!`);
+                    process.stdout.write(`!######${testStdoutTag}######!\n`);
                     return resolved;
                 },
                 (err) => {
                     const endTime = getTimeNs();
                     const durationNs = getDurationNs(startTime, endTime);
                     recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, null, err, durationNs);
-                    console.log(`!######${testStdoutTag}######!`);
+                    process.stdout.write(`!######${testStdoutTag}######!\n`);
                     throw err;
                 }
             );
@@ -623,7 +710,7 @@ function capture(funcName, lineId, fn, ...args) {
     recordResult(testModulePath, testClassName, testFunctionName, funcName, invocationId, args, returnValue, error, durationNs);
 
     // Print end tag (no duration for behavior mode, matching Python)
-    console.log(`!######${testStdoutTag}######!`);
+    process.stdout.write(`!######${testStdoutTag}######!\n`);
 
     if (error) throw error;
     return returnValue;
@@ -656,22 +743,24 @@ function capturePerf(funcName, lineId, fn, ...args) {
     const shouldLoop = getPerfLoopCount() > 1 && !checkSharedTimeLimit();
 
     // Get test context (computed once, reused across batch)
+    // Uses Vitest worker API as fallback when Jest-style beforeEach hook doesn't fire
+    const effectiveTestPath = getEffectiveTestPath();
     let testModulePath;
     if (TEST_MODULE) {
         testModulePath = TEST_MODULE;
-    } else if (currentTestPath) {
+    } else if (effectiveTestPath) {
         const path = require('path');
-        const relativePath = path.relative(process.cwd(), currentTestPath);
+        const relativePath = path.relative(process.cwd(), effectiveTestPath);
         testModulePath = relativePath
             .replace(/\\/g, '/')
             .replace(/\.js$/, '')
             .replace(/\.test$/, '.test')
             .replace(/\//g, '.');
     } else {
-        testModulePath = currentTestName || 'unknown';
+        testModulePath = getEffectiveTestName();
     }
     const testClassName = null;
-    const testFunctionName = currentTestName || 'unknown';
+    const testFunctionName = getEffectiveTestName();
 
     const safeModulePath = sanitizeTestId(testModulePath);
     const safeTestFunctionName = sanitizeTestId(testFunctionName);
@@ -767,8 +856,8 @@ function capturePerf(funcName, lineId, fn, ...args) {
             lastError = e;
         }
 
-        // Print end tag with timing
-        console.log(`!######${testStdoutTag}:${durationNs}######!`);
+        // Print end tag with timing (use process.stdout.write to bypass test framework console interception)
+        process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
 
         // Update shared loop counter
         sharedPerfState.totalLoopsCompleted++;
@@ -808,7 +897,7 @@ function capturePerf(funcName, lineId, fn, ...args) {
  * @private
  */
 function _recordAsyncTiming(startTime, testStdoutTag, durationNs, runtimes) {
-    console.log(`!######${testStdoutTag}:${durationNs}######!`);
+    process.stdout.write(`!######${testStdoutTag}:${durationNs}######!\n`);
     sharedPerfState.totalLoopsCompleted++;
     if (durationNs > 0) {
         runtimes.push(durationNs / 1000);
diff --git a/tests/test_languages/test_mocha_runner.py b/tests/test_languages/test_mocha_runner.py

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "codeflash",`
`3`		`- "version": "0.10.0",`
	`3`	`+ "version": "0.10.1",`
`4`	`4`	`"description": "Codeflash - AI-powered code optimization for JavaScript and TypeScript",`
`5`	`5`	`"main": "runtime/index.js",`
`6`	`6`	`"types": "runtime/index.d.ts",`