Skip to content

Commit ee06331

Browse files
Merge pull request #1224 from codeflash-ai/java-coverage-support
feat: add JaCoCo test coverage support for Java optimization
2 parents 77cddec + f67057d commit ee06331

11 files changed

Lines changed: 1176 additions & 59 deletions

File tree

codeflash/code_utils/config_parser.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414

1515
def find_pyproject_toml(config_file: Path | None = None) -> Path:
16-
# Find the pyproject.toml file on the root of the project
16+
# Find the pyproject.toml or codeflash.toml file on the root of the project
1717

1818
if config_file is not None:
1919
config_file = Path(config_file)
@@ -29,15 +29,21 @@ def find_pyproject_toml(config_file: Path | None = None) -> Path:
2929
# see if it was encountered before in search
3030
if cur_path in PYPROJECT_TOML_CACHE:
3131
return PYPROJECT_TOML_CACHE[cur_path]
32-
# map current path to closest file
32+
# map current path to closest file - check both pyproject.toml and codeflash.toml
3333
while dir_path != dir_path.parent:
34+
# First check pyproject.toml (Python projects)
3435
config_file = dir_path / "pyproject.toml"
3536
if config_file.exists():
3637
PYPROJECT_TOML_CACHE[cur_path] = config_file
3738
return config_file
38-
# Search for pyproject.toml in the parent directories
39+
# Then check codeflash.toml (Java/other projects)
40+
config_file = dir_path / "codeflash.toml"
41+
if config_file.exists():
42+
PYPROJECT_TOML_CACHE[cur_path] = config_file
43+
return config_file
44+
# Search in parent directories
3945
dir_path = dir_path.parent
40-
msg = f"Could not find pyproject.toml in the current directory {Path.cwd()} or any of the parent directories. Please create it by running `codeflash init`, or pass the path to pyproject.toml with the --config-file argument."
46+
msg = f"Could not find pyproject.toml or codeflash.toml in the current directory {Path.cwd()} or any of the parent directories. Please create it by running `codeflash init`, or pass the path to the config file with the --config-file argument."
4147

4248
raise ValueError(msg) from None
4349

@@ -123,7 +129,7 @@ def parse_config_file(
123129
if lsp_mode:
124130
# don't fail in lsp mode if codeflash config is not found.
125131
return {}, config_file_path
126-
msg = f"Could not find the 'codeflash' block in the config file {config_file_path}. Please run 'codeflash init' to add Codeflash config in the pyproject.toml config file."
132+
msg = f"Could not find the 'codeflash' block in the config file {config_file_path}. Please run 'codeflash init' to add Codeflash config."
127133
raise ValueError(msg) from e
128134
assert isinstance(config, dict)
129135

codeflash/languages/java/build_tools.py

Lines changed: 239 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@
1414
from dataclasses import dataclass
1515
from enum import Enum
1616
from pathlib import Path
17-
from typing import TYPE_CHECKING, Any
18-
19-
if TYPE_CHECKING:
20-
pass
2117

2218
logger = logging.getLogger(__name__)
2319

@@ -198,23 +194,23 @@ def _extract_java_version_from_pom(root: ET.Element, ns: dict[str, str]) -> str
198194
"""
199195
# Check properties
200196
for prop_name in ("maven.compiler.source", "java.version", "maven.compiler.release"):
201-
for props in [root.find(f"m:properties", ns), root.find("properties")]:
197+
for props in [root.find("m:properties", ns), root.find("properties")]:
202198
if props is not None:
203199
for prop in [props.find(f"m:{prop_name}", ns), props.find(prop_name)]:
204200
if prop is not None and prop.text:
205201
return prop.text
206202

207203
# Check compiler plugin configuration
208-
for build in [root.find(f"m:build", ns), root.find("build")]:
204+
for build in [root.find("m:build", ns), root.find("build")]:
209205
if build is not None:
210-
for plugins in [build.find(f"m:plugins", ns), build.find("plugins")]:
206+
for plugins in [build.find("m:plugins", ns), build.find("plugins")]:
211207
if plugins is not None:
212-
for plugin in plugins.findall(f"m:plugin", ns) + plugins.findall("plugin"):
213-
artifact_id = plugin.find(f"m:artifactId", ns) or plugin.find("artifactId")
208+
for plugin in plugins.findall("m:plugin", ns) + plugins.findall("plugin"):
209+
artifact_id = plugin.find("m:artifactId", ns) or plugin.find("artifactId")
214210
if artifact_id is not None and artifact_id.text == "maven-compiler-plugin":
215-
config = plugin.find(f"m:configuration", ns) or plugin.find("configuration")
211+
config = plugin.find("m:configuration", ns) or plugin.find("configuration")
216212
if config is not None:
217-
source = config.find(f"m:source", ns) or config.find("source")
213+
source = config.find("m:source", ns) or config.find("source")
218214
if source is not None and source.text:
219215
return source.text
220216

@@ -554,9 +550,8 @@ def install_codeflash_runtime(project_root: Path, runtime_jar_path: Path) -> boo
554550
if result.returncode == 0:
555551
logger.info("Successfully installed codeflash-runtime to local Maven repository")
556552
return True
557-
else:
558-
logger.error("Failed to install codeflash-runtime: %s", result.stderr)
559-
return False
553+
logger.error("Failed to install codeflash-runtime: %s", result.stderr)
554+
return False
560555

561556
except Exception as e:
562557
logger.exception("Failed to install codeflash-runtime: %s", e)
@@ -633,6 +628,236 @@ def add_codeflash_dependency_to_pom(pom_path: Path) -> bool:
633628
return False
634629

635630

631+
JACOCO_PLUGIN_VERSION = "0.8.11"
632+
633+
634+
def is_jacoco_configured(pom_path: Path) -> bool:
635+
"""Check if JaCoCo plugin is already configured in pom.xml.
636+
637+
Checks both the main build section and any profile build sections.
638+
639+
Args:
640+
pom_path: Path to the pom.xml file.
641+
642+
Returns:
643+
True if JaCoCo plugin is configured anywhere in the pom.xml, False otherwise.
644+
645+
"""
646+
if not pom_path.exists():
647+
return False
648+
649+
try:
650+
tree = ET.parse(pom_path)
651+
root = tree.getroot()
652+
653+
# Handle Maven namespace
654+
ns_prefix = "{http://maven.apache.org/POM/4.0.0}"
655+
656+
# Check if namespace is used
657+
use_ns = root.tag.startswith("{")
658+
if not use_ns:
659+
ns_prefix = ""
660+
661+
# Search all build/plugins sections (including those in profiles)
662+
# Using .// to search recursively for all plugin elements
663+
for plugin in root.findall(f".//{ns_prefix}plugin" if use_ns else ".//plugin"):
664+
artifact_id = plugin.find(f"{ns_prefix}artifactId" if use_ns else "artifactId")
665+
if artifact_id is not None and artifact_id.text == "jacoco-maven-plugin":
666+
group_id = plugin.find(f"{ns_prefix}groupId" if use_ns else "groupId")
667+
# Verify groupId if present (it's optional for org.jacoco)
668+
if group_id is None or group_id.text == "org.jacoco":
669+
return True
670+
671+
return False
672+
673+
except ET.ParseError as e:
674+
logger.warning("Failed to parse pom.xml for JaCoCo check: %s", e)
675+
return False
676+
677+
678+
def add_jacoco_plugin_to_pom(pom_path: Path) -> bool:
679+
"""Add JaCoCo Maven plugin to pom.xml for coverage collection.
680+
681+
Uses string manipulation to preserve the original XML format and avoid
682+
namespace prefix issues that ElementTree causes.
683+
684+
Args:
685+
pom_path: Path to the pom.xml file.
686+
687+
Returns:
688+
True if plugin was added or already present, False on error.
689+
690+
"""
691+
if not pom_path.exists():
692+
logger.error("pom.xml not found: %s", pom_path)
693+
return False
694+
695+
# Check if already configured
696+
if is_jacoco_configured(pom_path):
697+
logger.info("JaCoCo plugin already configured in pom.xml")
698+
return True
699+
700+
try:
701+
content = pom_path.read_text(encoding="utf-8")
702+
703+
# Basic validation that it's a Maven pom.xml
704+
if "</project>" not in content:
705+
logger.error("Invalid pom.xml: no closing </project> tag found")
706+
return False
707+
708+
# JaCoCo plugin XML to insert (indented for typical pom.xml format)
709+
jacoco_plugin = """
710+
<plugin>
711+
<groupId>org.jacoco</groupId>
712+
<artifactId>jacoco-maven-plugin</artifactId>
713+
<version>{version}</version>
714+
<executions>
715+
<execution>
716+
<id>prepare-agent</id>
717+
<goals>
718+
<goal>prepare-agent</goal>
719+
</goals>
720+
</execution>
721+
<execution>
722+
<id>report</id>
723+
<phase>verify</phase>
724+
<goals>
725+
<goal>report</goal>
726+
</goals>
727+
</execution>
728+
</executions>
729+
</plugin>""".format(version=JACOCO_PLUGIN_VERSION)
730+
731+
# Find the main <build> section (not inside <profiles>)
732+
# We need to find a <build> that appears after </profiles> or before <profiles>
733+
# or if there's no profiles section at all
734+
profiles_start = content.find("<profiles>")
735+
profiles_end = content.find("</profiles>")
736+
737+
# Find all <build> tags
738+
import re
739+
740+
# Find the main build section - it's the one NOT inside profiles
741+
# Strategy: Look for <build> that comes after </profiles> or before <profiles> (or no profiles)
742+
if profiles_start == -1:
743+
# No profiles, any <build> is the main one
744+
build_start = content.find("<build>")
745+
build_end = content.find("</build>")
746+
else:
747+
# Has profiles - find <build> outside of profiles
748+
# Check for <build> before <profiles>
749+
build_before_profiles = content[:profiles_start].rfind("<build>")
750+
# Check for <build> after </profiles>
751+
build_after_profiles = content[profiles_end:].find("<build>") if profiles_end != -1 else -1
752+
if build_after_profiles != -1:
753+
build_after_profiles += profiles_end
754+
755+
if build_before_profiles != -1:
756+
build_start = build_before_profiles
757+
# Find corresponding </build> - need to handle nested builds
758+
build_end = _find_closing_tag(content, build_start, "build")
759+
elif build_after_profiles != -1:
760+
build_start = build_after_profiles
761+
build_end = _find_closing_tag(content, build_start, "build")
762+
else:
763+
build_start = -1
764+
build_end = -1
765+
766+
if build_start != -1 and build_end != -1:
767+
# Found main build section, find plugins within it
768+
build_section = content[build_start:build_end + len("</build>")]
769+
plugins_start_in_build = build_section.find("<plugins>")
770+
plugins_end_in_build = build_section.rfind("</plugins>")
771+
772+
if plugins_start_in_build != -1 and plugins_end_in_build != -1:
773+
# Insert before </plugins> within the main build section
774+
absolute_plugins_end = build_start + plugins_end_in_build
775+
content = content[:absolute_plugins_end] + jacoco_plugin + "\n " + content[absolute_plugins_end:]
776+
else:
777+
# No plugins section in main build, add one before </build>
778+
plugins_section = f"<plugins>{jacoco_plugin}\n </plugins>\n "
779+
content = content[:build_end] + plugins_section + content[build_end:]
780+
else:
781+
# No main build section found, add one before </project>
782+
project_end = content.rfind("</project>")
783+
build_section = f"""
784+
<build>
785+
<plugins>{jacoco_plugin}
786+
</plugins>
787+
</build>
788+
"""
789+
content = content[:project_end] + build_section + content[project_end:]
790+
791+
pom_path.write_text(content, encoding="utf-8")
792+
logger.info("Added JaCoCo plugin to pom.xml")
793+
return True
794+
795+
except Exception as e:
796+
logger.exception("Failed to add JaCoCo plugin to pom.xml: %s", e)
797+
return False
798+
799+
800+
def _find_closing_tag(content: str, start_pos: int, tag_name: str) -> int:
801+
"""Find the position of the closing tag that matches the opening tag at start_pos.
802+
803+
Handles nested tags of the same name.
804+
805+
Args:
806+
content: The XML content.
807+
start_pos: Position of the opening tag.
808+
tag_name: Name of the tag.
809+
810+
Returns:
811+
Position of the closing tag, or -1 if not found.
812+
813+
"""
814+
open_tag = f"<{tag_name}>"
815+
open_tag_short = f"<{tag_name} " # For tags with attributes
816+
close_tag = f"</{tag_name}>"
817+
818+
# Start searching after the opening tag we're matching
819+
depth = 1 # We've already found the opening tag at start_pos
820+
pos = start_pos + len(f"<{tag_name}") # Move past the opening tag
821+
822+
while pos < len(content):
823+
next_open = content.find(open_tag, pos)
824+
next_open_short = content.find(open_tag_short, pos)
825+
next_close = content.find(close_tag, pos)
826+
827+
if next_close == -1:
828+
return -1
829+
830+
# Find the earliest opening tag (if any)
831+
candidates = [x for x in [next_open, next_open_short] if x != -1 and x < next_close]
832+
next_open_any = min(candidates) if candidates else len(content) + 1
833+
834+
if next_open_any < next_close:
835+
# Found opening tag first - nested tag
836+
depth += 1
837+
pos = next_open_any + 1
838+
else:
839+
# Found closing tag first
840+
depth -= 1
841+
if depth == 0:
842+
return next_close
843+
pos = next_close + len(close_tag)
844+
845+
return -1
846+
847+
848+
def get_jacoco_xml_path(project_root: Path) -> Path:
849+
"""Get the expected path to the JaCoCo XML report.
850+
851+
Args:
852+
project_root: Root directory of the Maven project.
853+
854+
Returns:
855+
Path to the JaCoCo XML report file.
856+
857+
"""
858+
return project_root / "target" / "site" / "jacoco" / "jacoco.xml"
859+
860+
636861
def find_test_root(project_root: Path) -> Path | None:
637862
"""Find the test root directory for a Java project.
638863

codeflash/languages/java/instrumentation.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,8 @@ def instrument_generated_java_test(
652652
653653
"""
654654
# Extract class name from the test code
655-
class_match = re.search(r'\bclass\s+(\w+)', test_code)
655+
# Use pattern that starts at beginning of line to avoid matching words in comments
656+
class_match = re.search(r'^(?:public\s+)?class\s+(\w+)', test_code, re.MULTILINE)
656657
if not class_match:
657658
logger.warning("Could not find class name in generated test")
658659
return test_code

0 commit comments

Comments
 (0)