From 22111ed2eb15a662abea28e27e257ca0f7afa479 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Thu, 12 Feb 2026 16:43:56 +0000 Subject: [PATCH 1/2] Optimize StandaloneCallTransformer._find_balanced_parens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves a **26% runtime improvement** by replacing character-by-character iteration with regex-based scanning to find special characters (quotes, parentheses, backslashes). This optimization significantly reduces Python-level loop overhead by leveraging compiled regex's C-level string scanning. **Key optimization:** - **Original approach:** Iterates through every character in the string (108,802 iterations in profiling), checking each one against quotes and parentheses - **Optimized approach:** Uses `self._special_char_re.search()` to jump directly to the next relevant character, reducing iterations from ~109K to ~16.5K (85% reduction) **Why this is faster:** The regex engine scans through irrelevant characters (letters, numbers, whitespace, operators) at C speed, only stopping at characters that matter for parenthesis balancing. Line profiler shows the main while loop went from 110,858 hits (18.6% of time) to just 18,571 hits (8.2% of time). **Performance characteristics by workload:** - **Best speedups (100%+ faster):** Large inputs with long stretches of non-special characters benefit most. Tests like `test_large_many_simple_arguments` (1655% faster) and `test_large_object_and_array_literals_complex` (1485% faster) show dramatic improvements because regex can skip over lengthy argument lists and object literals in one jump. - **Moderate slowdowns (30-60%):** Small inputs with many special characters pay a regex overhead penalty. Each `regex.search()` call has setup cost, so when special characters are frequent (e.g., `test_deeply_nested_parens_1000` with 73% slower), the optimization's benefits are negated. - **Trade-off sweet spot:** The optimization excels when the function is called on realistic JavaScript code with long argument lists, string literals, or object/array structures—common in test instrumentation scenarios. **Impact on workloads:** Given that `StandaloneCallTransformer` instruments JavaScript test code by finding function call boundaries, the typical use case involves parsing moderate-to-large code snippets with mixed content (strings, nested calls, object literals). The 26% average improvement suggests real-world code has enough non-special character sequences to benefit from regex scanning, making this optimization valuable for the hot path of JavaScript test instrumentation. --- codeflash/languages/javascript/instrument.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/codeflash/languages/javascript/instrument.py b/codeflash/languages/javascript/instrument.py index dee534044..e6639618b 100644 --- a/codeflash/languages/javascript/instrument.py +++ b/codeflash/languages/javascript/instrument.py @@ -128,6 +128,10 @@ def __init__(self, function_to_optimize: FunctionToOptimize, capture_func: str) rf"(\s*)(await\s+)?(\w+)\[['\"]({re.escape(self.func_name)})['\"]]\s*\(" ) + # Compiled regex to find the next character of interest (quotes, parentheses, backslash). + # This lets us skip large stretches of irrelevant characters in C instead of Python. + self._special_char_re = re.compile(r'["\'`()\\]') + def transform(self, code: str) -> str: """Transform all standalone calls in the code.""" result: list[str] = [] @@ -327,13 +331,21 @@ def _find_balanced_parens(self, code: str, open_paren_pos: int) -> tuple[str | N s_len = len(s) quotes = "\"'`" + special_re = self._special_char_re + + # Use regex to jump to the next special character (quote, parenthesis, backslash). + # This reduces Python-level iterations by leveraging C-level scanning. while pos < s_len and depth > 0: - char = s[pos] + m = special_re.search(s, pos) + if not m: + return None, -1 + i = m.start() + char = m.group(0) # Handle string literals # Note: preserve original escaping semantics (only checks immediate preceding char) if char in quotes: - prev_char = s[pos - 1] if pos > 0 else None + prev_char = s[i - 1] if i > 0 else None if prev_char != "\\": if not in_string: in_string = True @@ -347,7 +359,8 @@ def _find_balanced_parens(self, code: str, open_paren_pos: int) -> tuple[str | N elif char == ")": depth -= 1 - pos += 1 + pos = i + 1 + if depth != 0: return None, -1 From 1786860808b0243a0f2d6467a9f81db16655c6f5 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 12 Feb 2026 16:46:24 +0000 Subject: [PATCH 2/2] style: auto-fix linting issues Co-Authored-By: Claude Opus 4.6 --- codeflash/languages/javascript/instrument.py | 1 - 1 file changed, 1 deletion(-) diff --git a/codeflash/languages/javascript/instrument.py b/codeflash/languages/javascript/instrument.py index e6639618b..174a0416c 100644 --- a/codeflash/languages/javascript/instrument.py +++ b/codeflash/languages/javascript/instrument.py @@ -361,7 +361,6 @@ def _find_balanced_parens(self, code: str, open_paren_pos: int) -> tuple[str | N pos = i + 1 - if depth != 0: return None, -1