feat: refactor parse_xml_content to parse_patch_blocks for improved patch handling

BrunoV21 · BrunoV21 · commit 6f09f64b8bbc · 2025-07-18T19:31:44.000+01:00
diff --git a/codetide/agents/tide/utils.py b/codetide/agents/tide/utils.py
@@ -1,23 +1,23 @@
 from typing import List, Union
 import re
 
-def parse_xml_content(text: str, tag: str = "diff", multiple: bool = False) -> Union[str, List[str], None]:
+def parse_patch_blocks(text: str, multiple: bool = True) -> Union[str, List[str], None]:
     """
-    Extract content between <tag>...</tag> markers.
-    
+    Extract content between *** Begin Patch and *** End Patch markers (inclusive),
+    ensuring that both markers are at zero indentation (start of line, no leading spaces).
+
     Args:
-        text: Full input text.
-        tag: The tag name to extract content from (default: 'diff').
-        multiple: If True, return all matching blocks. If False, return only the first one.
-    
+        text: Full input text containing one or more patch blocks.
+        multiple: If True, return a list of all patch blocks. If False, return the first match.
+
     Returns:
-        The extracted content as a string (if one block), list of strings (if multiple),
-        or None if no blocks found.
+        A string (single patch), list of strings (multiple patches), or None if not found.
     """
-    pattern = fr"<{tag}>\s*(.*?)\s*</{tag}>"
-    matches = re.findall(pattern, text, re.DOTALL)
+    
+    pattern = r"(?m)^(\*\*\* Begin Patch[\s\S]*?^\*\*\* End Patch)$"
+    matches = re.findall(pattern, text)
 
     if not matches:
         return None
 
-    return matches if multiple else matches[0]
+    return matches if multiple else matches[0]
diff --git a/codetide/mcp/tools/patch_code/__init__.py b/codetide/mcp/tools/patch_code/__init__.py
@@ -4,15 +4,30 @@
 from typing import Dict, Tuple, List, Callable
 import pathlib
 import re
+import os
 
 BREAKLINE_TOKEN = "<n>"
 
+BREAKLINE_PER_FILE_TYPE = {
+    ".md": "\n",
+    ".py": r"\n"
+}
+
 # --------------------------------------------------------------------------- #
 #  User-facing API
 # --------------------------------------------------------------------------- #
 def text_to_patch(text: str, orig: Dict[str, str]) -> Tuple[Patch, int]:
     """High-level function to parse patch text against original file content."""
     lines = text.splitlines()
+    for i, line in enumerate(lines):
+        if line.startswith(("@", "***")):
+            continue
+
+        elif line.startswith("---") or not line.startswith(("+", "-", " ")):
+            lines[i] = f" {line}"
+    
+    # print(f"\n\n{lines[-2:]=}")
+
     if not lines or not Parser._norm(lines[0]).startswith("*** Begin Patch"):
         raise DiffError("Invalid patch text - must start with '*** Begin Patch'.")
     if not Parser._norm(lines[-1]) == "*** End Patch":
@@ -146,8 +161,9 @@ def open_file(path: str) -> str:
 def write_file(path: str, content: str) -> None:
     target = pathlib.Path(path)
     target.parent.mkdir(parents=True, exist_ok=True)
+    _, ext = os.path.splitext(target)
     with target.open("wt", encoding="utf-8", newline="\n") as fh:
-        fh.write(content.replace(BREAKLINE_TOKEN, "\\n"))
+        fh.write(content.replace(BREAKLINE_TOKEN, BREAKLINE_PER_FILE_TYPE.get(ext, r"\n")))
 
 
 def remove_file(path: str) -> None: