From 17f92911394c4d8a1c5027cfeb672afc6e5c7064 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Tue, 3 Jun 2025 21:03:58 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20function=20`g?= =?UTF-8?q?et=5Fdiff=5Flines=5Fcount`=20by=2019%=20in=20PR=20#274=20(`skip?= =?UTF-8?q?-formatting-for-large-diffs`)=20Here=20is=20a=20**much=20faster?= =?UTF-8?q?**=20rewrite.=20The=20biggest=20bottleneck=20was=20constructing?= =?UTF-8?q?=20the=20entire=20`diff=5Flines`=20list=20just=20to=20count=20i?= =?UTF-8?q?ts=20length.=20Instead,=20loop=20directly=20through=20the=20lin?= =?UTF-8?q?es=20and=20count=20matching=20lines,=20avoiding=20extra=20memor?= =?UTF-8?q?y=20and=20function=20call=20overhead.=20This=20also=20removes?= =?UTF-8?q?=20the=20small=20overhead=20of=20the=20nested=20function.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Optimizations made. - **No internal list allocation:** Now iterating and counting in one pass with no extra list. - **No inner function call:** Faster, via direct string checks. - **Short-circuit on empty:** Avoids string indexing on empty lines. - **Direct char compare for '+', '-':** Faster than using tuple membership or `startswith` with a tuple. This reduces both runtime **and** memory usage by avoiding unnecessary data structures! --- codeflash/code_utils/formatter.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/codeflash/code_utils/formatter.py b/codeflash/code_utils/formatter.py index e1d269aa7..debd87106 100644 --- a/codeflash/code_utils/formatter.py +++ b/codeflash/code_utils/formatter.py @@ -4,6 +4,7 @@ import shlex import subprocess from typing import TYPE_CHECKING, Optional + import isort from codeflash.cli_cmds.console import console, logger @@ -11,12 +12,14 @@ if TYPE_CHECKING: from pathlib import Path + def get_nth_line(text: str, n: int) -> str | None: for i, line in enumerate(text.splitlines(), start=1): if i == n: return line return None + def get_diff_output(cmd: list[str]) -> Optional[str]: try: result = subprocess.run(cmd, capture_output=True, text=True, check=True) @@ -35,25 +38,33 @@ def get_diff_output(cmd: list[str]) -> Optional[str]: def get_diff_lines_output_by_black(filepath: str) -> Optional[str]: try: import black # type: ignore - return get_diff_output(['black', '--diff', filepath]) + + return get_diff_output(["black", "--diff", filepath]) except ImportError: return None + def get_diff_lines_output_by_ruff(filepath: str) -> Optional[str]: try: import ruff # type: ignore - return get_diff_output(['ruff', 'format', '--diff', filepath]) + + return get_diff_output(["ruff", "format", "--diff", filepath]) except ImportError: print("can't import ruff") return None def get_diff_lines_count(diff_output: str) -> int: - lines = diff_output.split('\n') - def is_diff_line(line: str) -> bool: - return line.startswith(('+', '-')) and not line.startswith(('+++', '---')) - diff_lines = [line for line in lines if is_diff_line(line)] - return len(diff_lines) + # Count the number of diff lines in the given diff_output string + count = 0 + for line in diff_output.split("\n"): + if line: + c = line[0] + # Check first character and avoid lines starting with '+++', '---' + if (c == "+" or c == "-") and not (line.startswith("+++") or line.startswith("---")): + count += 1 + return count + def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool: diff_changes_stdout = None @@ -66,15 +77,15 @@ def is_safe_to_format(filepath: str, max_diff_lines: int = 100) -> bool: if diff_changes_stdout is None: logger.warning("Both ruff, black formatters not found, skipping formatting diff check.") return False - + diff_lines_count = get_diff_lines_count(diff_changes_stdout) - + if diff_lines_count > max_diff_lines: logger.debug(f"Skipping {filepath}: {diff_lines_count} lines would change (max: {max_diff_lines})") return False return True - + def format_code(formatter_cmds: list[str], path: Path, print_status: bool = True) -> str: # noqa # TODO: Only allow a particular whitelist of formatters here to prevent arbitrary code execution