1111class DiffGenerator :
1212 """
1313 A class for generating custom diffs between two pieces of content.
14+ It enhances the standard unified diff by adding function/class context to hunk headers,
15+ similar to `git diff`, in a fail-safe manner.
1416 """
1517
18+ # A pre-compiled list of regex patterns to find function/class definitions.
19+ # This is the core mechanism that mimics Git's `xfuncname` feature.
20+ # It covers a wide range of common languages to provide broad, out-of-the-box support.
21+ _FUNC_CONTEXT_PATTERNS = [
22+ re .compile (r'^\s*(def|class)\s+.*' , re .IGNORECASE ), # Python
23+ re .compile (r'^\s*(public|private|protected|static|final|native|synchronized|abstract|transient|volatile|strictfp|async|function|class|interface|enum|@|implements|extends)' ), # Java, JS, TS, PHP, C#
24+ re .compile (r'^\s*(func|fn|impl|trait|struct|enum|mod)\s+.*' , re .IGNORECASE ), # Go, Rust
25+ re .compile (r'^\s*(def|class|module)\s+.*' , re .IGNORECASE ), # Ruby
26+ re .compile (r'^\s*([a-zA-Z_][a-zA-Z0-9_]*\s+)*[a-zA-Z_][a-zA-Z0-9_]*\s*\(.*\)\s*\{' ), # C, C++ style function definitions
27+ re .compile (r'^sub\s+.*' ), # Perl
28+ ]
29+
30+ @staticmethod
31+ def _find_context (line_index : int , lines : List [str ]) -> str :
32+ """
33+ Search upwards from a given line index to find the nearest function/class context.
34+
35+ Args:
36+ line_index (int): The 0-based index to start searching upwards from.
37+ lines (List[str]): The content of the file, as a list of lines.
38+
39+ Returns:
40+ str: The found context line, stripped of whitespace, or an empty string if not found.
41+ """
42+ # Search from the target line upwards to the beginning of the file.
43+ for i in range (line_index , - 1 , - 1 ):
44+ line = lines [i ]
45+ # Check the line against all our predefined patterns.
46+ for pattern in DiffGenerator ._FUNC_CONTEXT_PATTERNS :
47+ if pattern .search (line ):
48+ return line .strip ()
49+ return "" # Return empty string if no context is found.
50+
1651 @staticmethod
1752 def generate_custom_diff (base_content : str , head_content : str , context_lines : int ) -> str :
1853 """
19- Generate a custom diff between two pieces of content with specified context lines.
54+ Generate a custom diff between two pieces of content with specified context lines,
55+ and automatically add function/class context to hunk headers, similar to `git diff`.
56+ This method is designed to be fail-safe; if context addition fails, it returns the standard diff.
2057
2158 Args:
2259 base_content (str): The original content.
2360 head_content (str): The new content to compare against the base.
2461 context_lines (int): The number of context lines to include in the diff.
2562
2663 Returns:
27- str: A string representation of the unified diff.
64+ str: A string representation of the unified diff, preferably with hunk headers .
2865
2966 Raises:
3067 ValueError: If context_lines is negative.
@@ -40,15 +77,69 @@ def generate_custom_diff(base_content: str, head_content: str, context_lines: in
4077 # File is deleted
4178 return "" .join (f"- { line } \n " for line in base_content .splitlines ())
4279
80+ # Use empty strings for None content to ensure difflib handles them correctly
81+ # as file additions or deletions. This is more robust and aligns with difflib's expectations.
82+ base_content = base_content or ""
83+ head_content = head_content or ""
84+
4385 base_lines : List [str ] = base_content .splitlines ()
4486 head_lines : List [str ] = head_content .splitlines ()
4587
88+ # Generate the standard unified diff. This part is considered stable.
89+ diff : List [str ] = list (difflib .unified_diff (
90+ base_lines ,
91+ head_lines ,
92+ n = context_lines ,
93+ lineterm = ''
94+ ))
95+
96+ if not diff :
97+ return "" # No differences found, return early.
98+
99+ # --- Start of the fail-safe enhancement logic ---
100+ # This entire block attempts to add context to hunk headers.
101+ # If any exception occurs here, we catch it and return the original, un-enhanced diff.
102+ # This ensures the function is always reliable (Pareto improvement).
46103 try :
47- diff : List [str ] = list (difflib .unified_diff (base_lines , head_lines , n = context_lines , lineterm = '' ))
48- return '\n ' .join (diff )
104+ enhanced_diff = []
105+ # Regex to parse the original line number from a hunk header.
106+ # e.g., from "@@ -35,7 +35,7 @@" it captures "35".
107+ hunk_header_re = re .compile (r'^@@ -(\d+)(?:,\d+)? .*' )
108+
109+ for line in diff :
110+ match = hunk_header_re .match (line )
111+ if match :
112+ # This is a hunk header line.
113+ # The line number from the regex is 1-based.
114+ start_line_num = int (match .group (1 ))
115+
116+ # The index is 0-based, so we subtract 1.
117+ # We search from the line where the change starts, or the line before it.
118+ context_line_index = max (0 , start_line_num - 1 )
119+ context = DiffGenerator ._find_context (context_line_index , base_lines )
120+
121+ if context :
122+ # If context was found, append it to the hunk header.
123+ enhanced_diff .append (f"{ line } { context } " )
124+ else :
125+ # Otherwise, use the original hunk header.
126+ enhanced_diff .append (line )
127+ else :
128+ # This is not a hunk header, just a regular diff line (+, -, ' ').
129+ enhanced_diff .append (line )
130+
131+ # If the enhancement process completes successfully, return the result.
132+ return '\n ' .join (enhanced_diff )
133+
49134 except Exception as e :
50- logger .exception (f"Error generating diff: { str (e )} " )
51- return ""
135+ # If any error occurred during the enhancement, log a warning and fall back.
136+ logger .warning (
137+ f"Could not add hunk header context due to an unexpected error: { str (e )} . "
138+ "Falling back to standard diff output."
139+ )
140+ # --- Fallback mechanism ---
141+ # Return the original, unmodified diff generated by difflib.
142+ return '\n ' .join (diff )
52143
53144
54145class DataAnonymizer :
0 commit comments