Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 69 additions & 2 deletions codeflash/languages/javascript/instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from __future__ import annotations

import bisect
import re
from dataclasses import dataclass
from pathlib import Path
Expand Down Expand Up @@ -487,14 +488,18 @@ def transform(self, code: str) -> str:
result: list[str] = []
pos = 0

while pos < len(code):
# Precompute string spans once per transform invocation for fast lookups
starts, ends = self._compute_string_spans(code)

n = len(code)
while pos < n:
match = self._expect_pattern.search(code, pos)
if not match:
result.append(code[pos:])
break

# Skip if inside a string literal (e.g., test description)
if is_inside_string(code, match.start()):
if self._pos_inside_spans(match.start(), starts, ends):
result.append(code[pos : match.end()])
pos = match.end()
continue
Expand Down Expand Up @@ -729,6 +734,68 @@ def _generate_transformed_call(self, match: ExpectCallMatch) -> str:
f"'{line_id}', {func_ref})){match.assertion_chain}{semicolon}"
)

def _compute_string_spans(self, code: str) -> tuple[list[int], list[int]]:
r"""Compute inclusive/exclusive spans for string contents.

Spans represent the region of code that would be considered 'inside' a string
by the original is_inside_string behavior: start is the first position after
the opening quote, end is the position after the closing quote (or len(code)
if no closing quote is found). A position pos is considered inside a string
if start <= pos < end.

This function handles escapes (\\) similarly to the original function and
treats backticks (`) like other quotes (no special ${} handling to match original).
"""
starts: list[int] = []
ends: list[int] = []
i = 0
n = len(code)

while i < n:
ch = code[i]
if ch in "\"'`":
# Determine if this quote is escaped
# A quote is escaped if the preceding char is an odd number of backslashes.
# Simpler: check immediate preceding char only (original code only checked immediate preceding char).
# To preserve behavior, we follow original: only treat as escaped if code[i-1] == '\\'
if i > 0 and code[i - 1] == "\\":
i += 1
continue

start = i + 1 # start of content (pos equal to quote itself is considered outside)
quote_char = ch
i += 1
# Scan until closing quote or end
while i < n:
c = code[i]
if c == "\\" and i + 1 < n:
i += 2
continue
if c == quote_char:
i += 1
break
i += 1
end = i # position after closing quote or n if not closed
Comment on lines +767 to +778
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚡️Codeflash found 10% (0.10x) speedup for ExpectCallTransformer._compute_string_spans in codeflash/languages/javascript/instrument.py

⏱️ Runtime : 725 microseconds 656 microseconds (best of 208 runs)

📝 Explanation and details

The optimized code achieves a 10% runtime improvement by replacing the character-by-character scanning for closing quotes with Python's built-in str.find() method, which is implemented in C and significantly faster for string searches.

Key optimization:
Instead of manually iterating through each character to find the closing quote:

# Original: Manual character iteration
while i < n:
    c = code[i]
    if c == "\\" and i + 1 < n:
        i += 2
        continue
    if c == quote_char:
        i += 1
        break
    i += 1

The optimized version uses str.find() to jump directly to candidate closing quotes, then counts preceding backslashes to determine if the quote is escaped:

# Optimized: Use find() + backslash counting
while True:
    q = code.find(quote_char, search_pos)
    if q == -1:
        end = n
        i = n
        break
    
    # Count backslashes before the quote
    p = q - 1
    backslash_count = 0
    while p >= start and code[p] == "\\":
        backslash_count += 1
        p -= 1
    
    # Odd number of backslashes means quote is escaped
    if backslash_count % 2 == 1:
        search_pos = q + 1
        continue
    
    end = q + 1
    i = end
    break

Why this is faster:

  1. Leverages C-level string operations: str.find() is implemented in C and optimized for substring searching, eliminating Python bytecode overhead for each character check
  2. Reduces iteration count: Instead of checking every character between quotes, we jump directly to quote candidates and only examine backslashes immediately before them
  3. Most effective for longer strings: The line profiler shows the inner while loop consumed ~40% of runtime in the original (lines with 13.7%, 13.1%, 15.2% time). The optimization dramatically reduces this overhead.

Test results confirm the optimization pattern:

  • Short strings (≤20 chars): Slight regression (0-38% slower) due to additional overhead of find() setup
  • Medium strings (100-1000 chars): Moderate gains (32-188% faster)
  • Long strings (1000+ chars): Massive gains (2519-2843% faster) as the benefit of skipping character-by-character iteration compounds

This optimization is particularly valuable for JavaScript test instrumentation where string literals can be lengthy (code snippets, error messages, template literals), making the overall 10% runtime improvement primarily driven by these longer string cases.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 84 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 92.6%
🌀 Click to see Generated Regression Tests
import pytest
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
from codeflash.languages.javascript.instrument import ExpectCallTransformer

# Helper function to create a FunctionToOptimize instance for testing
def create_function_to_optimize(func_name: str = "testFunc") -> FunctionToOptimize:
    """Create a minimal FunctionToOptimize instance for testing."""
    return FunctionToOptimize(
        function_name=func_name,
        qualified_name=f"module.{func_name}",
        file_path="test.js",
        start_line=1,
        end_line=10
    )

def test_empty_code_string():
    """Test with empty code string - should return empty lists."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    starts, ends = transformer._compute_string_spans("") # 631ns -> 641ns (1.56% slower)

def test_code_without_strings():
    """Test with code containing no string literals - should return empty lists."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = "const x = 42; let y = x + 1; return y;"
    starts, ends = transformer._compute_string_spans(code) # 3.11μs -> 3.14μs (0.957% slower)

def test_single_double_quoted_string():
    """Test with a single double-quoted string."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = '"hello"'
    starts, ends = transformer._compute_string_spans(code) # 1.79μs -> 1.99μs (10.0% slower)

def test_single_single_quoted_string():
    """Test with a single single-quoted string."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = "'world'"
    starts, ends = transformer._compute_string_spans(code) # 1.68μs -> 1.95μs (13.8% slower)

def test_single_backtick_string():
    """Test with a single backtick-quoted string (template literal)."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = "`template`"
    starts, ends = transformer._compute_string_spans(code) # 1.85μs -> 1.98μs (6.60% slower)

def test_multiple_strings():
    """Test with multiple strings in code."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = '"first" + \'second\' + `third`'
    starts, ends = transformer._compute_string_spans(code) # 3.69μs -> 3.85μs (4.16% slower)

def test_empty_string():
    """Test with an empty string literal."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = '""'
    starts, ends = transformer._compute_string_spans(code) # 1.26μs -> 1.81μs (30.3% slower)

def test_string_with_escaped_quote_inside():
    """Test string containing escaped quotes of the same type."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"hello\"world"'
    starts, ends = transformer._compute_string_spans(code) # 2.10μs -> 2.56μs (17.9% slower)

def test_string_with_different_escaped_quote_inside():
    """Test string with escaped quote of different type inside."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"hello\'world"'
    starts, ends = transformer._compute_string_spans(code) # 2.17μs -> 1.96μs (10.7% faster)

def test_string_starting_with_escaped_backslash():
    """Test string that contains escaped backslash."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"hello\\world"'
    starts, ends = transformer._compute_string_spans(code) # 2.08μs -> 1.92μs (8.32% faster)

def test_string_with_newline_escape():
    """Test string with escaped newline (\\n)."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"line1\nline2"'
    starts, ends = transformer._compute_string_spans(code) # 2.23μs -> 1.90μs (17.3% faster)

def test_mixed_quote_types():
    """Test that different quote types don't interfere."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = """"inside single 'quotes' here"""
    starts, ends = transformer._compute_string_spans(code) # 2.85μs -> 1.61μs (77.1% faster)

def test_unclosed_string_at_end():
    """Test with unclosed string at end of code."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = 'const x = "unclosed'
    starts, ends = transformer._compute_string_spans(code) # 2.56μs -> 2.48μs (2.86% faster)

def test_escaped_quote_at_start_of_string():
    """Test with escaped quote at the very start of string content."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"\"hello"'
    starts, ends = transformer._compute_string_spans(code) # 1.78μs -> 2.56μs (30.2% slower)

def test_escaped_quote_at_end_of_string():
    """Test with escaped quote at the very end of string content."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"hello\""'
    starts, ends = transformer._compute_string_spans(code) # 1.80μs -> 2.56μs (29.4% slower)

def test_adjacent_strings():
    """Test with adjacent strings (no spaces between them)."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = '"first""second"'
    starts, ends = transformer._compute_string_spans(code) # 2.60μs -> 2.69μs (3.02% slower)

def test_backslash_at_end_of_code():
    """Test with backslash at the very end of code (no character after)."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = '"text\\'
    starts, ends = transformer._compute_string_spans(code) # 1.69μs -> 1.55μs (8.94% faster)

def test_single_backslash_outside_string():
    """Test backslash outside of string context."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = 'x = \\n "hello"'
    starts, ends = transformer._compute_string_spans(code) # 2.29μs -> 2.62μs (13.0% slower)

def test_escaped_backslash_before_quote():
    """Test escaped backslash immediately before a quote character."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'x = "\\"'
    starts, ends = transformer._compute_string_spans(code) # 1.90μs -> 2.67μs (28.8% slower)

def test_multiple_consecutive_escapes():
    """Test multiple consecutive escape sequences."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"\\\\\\n"'
    starts, ends = transformer._compute_string_spans(code) # 1.76μs -> 1.99μs (11.5% slower)

def test_quote_escaped_by_preceding_backslash():
    """Test that a quote preceded by single backslash is not treated as string terminator."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"text\" more"'
    starts, ends = transformer._compute_string_spans(code) # 2.11μs -> 2.54μs (16.9% slower)

def test_three_consecutive_backslashes():
    """Test three consecutive backslashes (second and third form escape pair)."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"text\\\""'
    starts, ends = transformer._compute_string_spans(code) # 1.86μs -> 2.69μs (30.8% slower)

def test_all_three_quote_types_in_sequence():
    """Test code containing all three quote types in sequence."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = '"a" + \'b\' + `c`'
    starts, ends = transformer._compute_string_spans(code) # 2.88μs -> 3.79μs (24.1% slower)

def test_string_with_only_escaped_backslash():
    """Test string containing only escaped backslash."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"\\"'
    starts, ends = transformer._compute_string_spans(code) # 1.34μs -> 2.16μs (37.9% slower)

def test_code_with_string_at_various_positions():
    """Test strings at beginning, middle, and end of code."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = '"start" + middle + "end"'
    starts, ends = transformer._compute_string_spans(code) # 3.10μs -> 3.59μs (13.7% slower)

def test_quote_preceded_by_non_backslash():
    """Test that quotes preceded by non-backslash characters are not treated as escaped."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = 'a"hello"b'
    starts, ends = transformer._compute_string_spans(code) # 2.01μs -> 2.30μs (12.6% slower)

def test_very_long_string():
    """Test with a very long string to ensure proper span calculation."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    long_content = "x" * 1000
    code = f'"{long_content}"'
    starts, ends = transformer._compute_string_spans(code) # 59.3μs -> 2.26μs (2519% faster)

def test_string_with_regex_like_pattern():
    """Test string containing regex-like content (should be treated as plain string)."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"[a-z]+\d{2}"'
    starts, ends = transformer._compute_string_spans(code) # 2.22μs -> 1.93μs (15.0% faster)

def test_nested_quote_pairs():
    """Test that quotes of different types can appear inside strings."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = '''"`'nested'`"'''
    starts, ends = transformer._compute_string_spans(code) # 1.88μs -> 1.98μs (4.99% slower)

def test_many_short_strings():
    """Test with 100 short strings to verify scalability."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    # Build code with many short strings: "a" + "b" + "c" + ...
    parts = [f'"{chr(97 + i % 26)}"' for i in range(100)]
    code = " + ".join(parts)
    starts, ends = transformer._compute_string_spans(code) # 46.0μs -> 53.3μs (13.8% slower)
    # All strings should have same length (3 chars each, content span 1-3)
    for i, (start, end) in enumerate(zip(starts, ends)):
        pass

def test_many_long_strings():
    """Test with 50 longer strings."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    # Create 50 strings of 20 characters each
    parts = [f'"{chr(97 + i % 26) * 20}"' for i in range(50)]
    code = " + ".join(parts)
    starts, ends = transformer._compute_string_spans(code) # 80.3μs -> 27.9μs (188% faster)
    # Each string has 20 characters of content
    for start, end in zip(starts, ends):
        pass

def test_alternating_quote_types_large():
    """Test with 100 strings alternating between three quote types."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    quotes = ['"', "'", "`"]
    parts = []
    for i in range(100):
        quote = quotes[i % 3]
        content = f"str{i}"
        parts.append(f"{quote}{content}{quote}")
    code = " + ".join(parts)
    starts, ends = transformer._compute_string_spans(code) # 73.0μs -> 54.6μs (33.8% faster)

def test_dense_escaped_sequences():
    """Test string with many consecutive escaped characters."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    # Create a string with many escaped characters: "\\" repeated
    code = '"' + (r'\\' * 100) + '"'
    starts, ends = transformer._compute_string_spans(code) # 7.42μs -> 13.2μs (43.8% slower)

def test_alternating_escaped_and_unescaped():
    """Test string alternating between escaped and unescaped patterns."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    # Pattern: \\ + regular char + \\ + regular char ... (1000 iterations)
    pattern = (r'\\' + 'x') * 500
    code = f'"{pattern}"'
    starts, ends = transformer._compute_string_spans(code) # 65.2μs -> 2.21μs (2843% faster)

def test_many_strings_with_mixed_escapes():
    """Test 100 strings each containing various escape sequences."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    parts = []
    for i in range(100):
        # Each string: "before\nafter"
        escaped_content = f"line{i}" + r"\n" + f"line{i+1}"
        parts.append(f'"{escaped_content}"')
    code = " + ".join(parts)
    starts, ends = transformer._compute_string_spans(code) # 122μs -> 53.9μs (127% faster)

def test_deeply_nested_quote_patterns():
    """Test code with multiple levels of string nesting patterns."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    # Create a pattern like: "a'b`c'd"e'f"
    # Each quote type is used inside other quote types
    code = '''"a'b`c'd"e'f"g`h'i"j'''
    starts, ends = transformer._compute_string_spans(code) # 3.17μs -> 3.49μs (9.21% slower)

def test_1000_character_code_with_strings():
    """Test with ~1000 character code containing multiple strings."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    # Build code: const str1 = "content1"; const str2 = "content2"; ... (repeated)
    code_parts = []
    for i in range(50):
        code_parts.append(f'const str{i} = "content{i}";')
    code = " ".join(code_parts)
    starts, ends = transformer._compute_string_spans(code) # 85.8μs -> 65.0μs (32.0% faster)

def test_string_positions_validity():
    """Test that all computed string positions are valid indices."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = 'const a = "first"; let b = \'second\'; var c = `third`;'
    starts, ends = transformer._compute_string_spans(code) # 5.06μs -> 5.30μs (4.53% slower)
    # Verify all positions are valid
    for start, end in zip(starts, ends):
        pass

def test_string_contents_extraction():
    """Test that computed spans correctly identify string contents."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = 'var x = "hello world";'
    starts, ends = transformer._compute_string_spans(code) # 2.75μs -> 2.71μs (1.10% faster)
    # Extract using computed spans and verify
    for start, end in zip(starts, ends):
        content = code[start:end]

def test_performance_with_many_escaped_quotes():
    """Test performance with code containing many escaped quotes."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    # Create 100 strings, each with 10 escaped quotes inside
    parts = []
    for i in range(100):
        content = r'\"' * 10
        parts.append(f'"{content}"')
    code = " + ".join(parts)
    # Should complete quickly even with complex escape handling
    starts, ends = transformer._compute_string_spans(code) # 111μs -> 301μs (63.2% slower)

def test_quote_at_position_zero():
    """Test string that starts at position 0."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = '"start of code'
    starts, ends = transformer._compute_string_spans(code) # 2.11μs -> 1.63μs (29.5% faster)

def test_consecutive_escape_pairs_at_boundary():
    """Test escape sequences right at string boundaries."""
    func_opt = create_function_to_optimize()
    transformer = ExpectCallTransformer(func_opt, "capture")
    code = r'"\\\\\\\\\\\\\\\\\\\\\"'  # Many escaped backslashes before closing quote
    starts, ends = transformer._compute_string_spans(code) # 1.99μs -> 3.58μs (44.3% slower)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To test or edit this optimization locally git merge codeflash/optimize-pr1469-2026-02-12T17.18.54

Click to see suggested changes
Suggested change
i += 1
# Scan until closing quote or end
while i < n:
c = code[i]
if c == "\\" and i + 1 < n:
i += 2
continue
if c == quote_char:
i += 1
break
i += 1
end = i # position after closing quote or n if not closed
search_pos = start
while True:
q = code.find(quote_char, search_pos)
if q == -1:
end = n
i = n
break
p = q - 1
backslash_count = 0
while p >= start and code[p] == "\\":
backslash_count += 1
p -= 1
if backslash_count % 2 == 1:
search_pos = q + 1
continue
end = q + 1
i = end
break

Static Badge

starts.append(start)
ends.append(end)
else:
i += 1

return starts, ends

def _pos_inside_spans(self, pos: int, starts: list[int], ends: list[int]) -> bool:
"""Return True if pos is inside any (start, end) span using binary search.

Spans are expected to be non-overlapping and sorted by start.
"""
if not starts:
return False
idx = bisect.bisect_right(starts, pos)
if idx == 0:
return False
si = idx - 1
return pos < ends[si]


def transform_expect_calls(
code: str, function_to_optimize: FunctionToOptimize, capture_func: str, remove_assertions: bool = False
Expand Down
Loading