Skip to content

Commit b5ee6b6

Browse files
committed
feat: Enhance issue reconciliation by introducing resolutionReason field and improving handling of unanchored issues
1 parent 2d389aa commit b5ee6b6

9 files changed

Lines changed: 235 additions & 55 deletions

File tree

java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/service/branch/BranchIssueReconciliationService.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,7 @@ public BranchIssueReconciliationService(
7171
* <p>
7272
* CodeAnalysisIssue records remain immutable historical records.
7373
*/
74-
public void reconcileIssueLineNumbers(String rawDiff, Set<String> changedFiles,
75-
Branch branch) {
74+
public void reconcileIssueLineNumbers(String rawDiff, Set<String> changedFiles, Branch branch) {
7675
if (rawDiff == null || rawDiff.isBlank())
7776
return;
7877

@@ -480,9 +479,15 @@ public boolean processReconciledIssue(Map<String, Object> issueData, Branch bran
480479
return false;
481480

482481
boolean resolved = isMarkedResolved(issueData);
483-
String resolvedDescription = issueData.get("reason") != null
484-
? String.valueOf(issueData.get("reason"))
482+
// Read dedicated resolutionReason field (not 'reason' which is the issue description)
483+
String resolvedDescription = issueData.get("resolutionReason") != null
484+
? String.valueOf(issueData.get("resolutionReason"))
485485
: null;
486+
// Fallback: if AI didn't provide resolutionReason, use 'reason' ONLY if it
487+
// differs from the original issue description (i.e. AI actually wrote a fix explanation)
488+
if (resolvedDescription == null || resolvedDescription.isBlank()) {
489+
resolvedDescription = "Resolved by AI reconciliation";
490+
}
486491

487492
if (!resolved)
488493
return false;
@@ -642,7 +647,8 @@ private void classifyIssuesByContent(List<BranchIssue> fileIssues,
642647
boolean contentFound = false;
643648
String updatedLineHash = null;
644649

645-
// Issues at line <= 1 with no codeSnippet have no reliable anchor
650+
// Unanchored issues (line <= 1, no codeSnippet) have no reliable content
651+
// anchor → must be reconciled via AI, not deterministic tracking
646652
boolean hasNoReliableAnchor = (currentLine == null || currentLine <= 1)
647653
&& (bi.getCodeSnippet() == null || bi.getCodeSnippet().isBlank());
648654
if (hasNoReliableAnchor) {

java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/service/pr/PrFileEnrichmentService.java

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -162,24 +162,46 @@ public PrEnrichmentDataDto enrichPrFiles(
162162
}
163163

164164
/**
165-
* Filter files to only those with supported extensions for parsing.
165+
* Blacklist of binary / non-text file extensions that should be excluded
166+
* from enrichment. Everything else is allowed.
166167
*/
167-
private List<String> filterSupportedFiles(List<String> files, Map<String, Integer> skipReasons) {
168-
Set<String> supportedExtensions = Set.of(
169-
".java", ".py", ".js", ".ts", ".jsx", ".tsx",
170-
".go", ".rs", ".rb", ".php", ".cs", ".cpp", ".c", ".h",
171-
".kt", ".scala", ".swift", ".m", ".mm");
168+
private static final Set<String> EXCLUDED_EXTENSIONS = Set.of(
169+
// Images
170+
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp", ".tiff", ".tif",
171+
// Fonts
172+
".woff", ".woff2", ".ttf", ".otf", ".eot",
173+
// Compiled / bytecode
174+
".class", ".pyc", ".pyo", ".o", ".obj", ".dll", ".so", ".dylib", ".exe",
175+
".war", ".ear", ".nar",
176+
// Archives
177+
".jar", ".zip", ".tar", ".gz", ".bz2", ".xz", ".7z", ".rar",
178+
// Documents / media
179+
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
180+
".mp3", ".mp4", ".avi", ".mov", ".wav", ".flac", ".ogg", ".webm",
181+
// Data blobs / certs
182+
".bin", ".dat", ".db", ".sqlite", ".sqlite3",
183+
".p12", ".pfx", ".jks", ".keystore", ".der", ".cer",
184+
// Lock files (large, auto-generated, no review value)
185+
".lockb",
186+
// Misc binary
187+
".wasm", ".map", ".min.js", ".min.css"
188+
);
172189

190+
/**
191+
* Filter out known binary / non-text files. Everything not blacklisted is
192+
* allowed through for enrichment and file snapshots.
193+
*/
194+
private List<String> filterSupportedFiles(List<String> files, Map<String, Integer> skipReasons) {
173195
List<String> supported = new ArrayList<>();
174196
for (String file : files) {
175197
String lower = file.toLowerCase();
176-
boolean isSupported = supportedExtensions.stream()
198+
boolean isExcluded = EXCLUDED_EXTENSIONS.stream()
177199
.anyMatch(lower::endsWith);
178200

179-
if (isSupported) {
180-
supported.add(file);
201+
if (isExcluded) {
202+
skipReasons.merge("binary_or_non_text", 1, Integer::sum);
181203
} else {
182-
skipReasons.merge("unsupported_extension", 1, Integer::sum);
204+
supported.add(file);
183205
}
184206
}
185207

java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/service/pr/PrIssueTrackingService.java

Lines changed: 142 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ public TrackingSummary trackPrIteration(
8686
int matched = 0;
8787
int newOnly = 0;
8888
int resolved = 0;
89+
int unanchoredResolved = 0;
90+
int unanchoredPersisting = 0;
8991

9092
for (String filePath : allFiles) {
9193
List<CodeAnalysisIssue> fileNewIssues = newByFile.getOrDefault(filePath, List.of());
@@ -102,14 +104,118 @@ public TrackingSummary trackPrIteration(
102104
continue;
103105
}
104106

107+
// ── Separate unanchored previous issues (line <= 1, no lineHash, no codeSnippet) ──
108+
// These cannot be reliably tracked by content hashing. Instead, match them
109+
// by the original issue ID that the AI preserves in its response.
110+
// If the AI omitted the issue entirely → resolved (AI didn't find it).
111+
// If the AI re-reported it with isResolved=true → resolved.
112+
// If the AI re-reported it with isResolved=false → mark for AI reconciliation.
113+
List<CodeAnalysisIssue> anchoredPrevIssues = new ArrayList<>();
114+
List<CodeAnalysisIssue> unanchoredPrevIssues = new ArrayList<>();
115+
for (CodeAnalysisIssue prev : filePrevIssues) {
116+
if (isUnanchored(prev)) {
117+
unanchoredPrevIssues.add(prev);
118+
} else {
119+
anchoredPrevIssues.add(prev);
120+
}
121+
}
122+
123+
// Handle unanchored previous issues via fingerprint matching.
124+
// Unlike IssueTracker (which would make these "immortal" via Pass 3/4),
125+
// we match by fingerprint but ALSO respect the new issue's isResolved flag
126+
// from the AI's Stage 1 review.
127+
if (!unanchoredPrevIssues.isEmpty()) {
128+
// Build a lookup: fingerprint → list of new issues with that fingerprint
129+
Map<String, List<CodeAnalysisIssue>> newByFingerprint = new LinkedHashMap<>();
130+
for (CodeAnalysisIssue newIssue : fileNewIssues) {
131+
String fp = newIssue.getIssueFingerprint();
132+
if (fp != null) {
133+
newByFingerprint.computeIfAbsent(fp, k -> new ArrayList<>()).add(newIssue);
134+
}
135+
}
136+
137+
Set<CodeAnalysisIssue> unanchoredMatchedNewIssues = new HashSet<>();
138+
for (CodeAnalysisIssue prevIssue : unanchoredPrevIssues) {
139+
String prevFp = prevIssue.getIssueFingerprint();
140+
List<CodeAnalysisIssue> candidates = prevFp != null
141+
? newByFingerprint.getOrDefault(prevFp, List.of())
142+
: List.of();
143+
144+
// Pick the first unmatched candidate (unanchored issues at line 1 are
145+
// identical in terms of fingerprint, so order doesn't matter)
146+
CodeAnalysisIssue matchedNew = null;
147+
for (CodeAnalysisIssue c : candidates) {
148+
if (!unanchoredMatchedNewIssues.contains(c)) {
149+
matchedNew = c;
150+
break;
151+
}
152+
}
153+
154+
if (matchedNew != null) {
155+
// AI re-reported this issue — link them
156+
matchedNew.setTrackedFromIssueId(prevIssue.getId());
157+
matchedNew.setTrackingConfidence("UNANCHORED_FP_MATCH");
158+
unanchoredMatchedNewIssues.add(matchedNew);
159+
160+
if (matchedNew.isResolved()) {
161+
// AI marked it resolved in Stage 1 → trust it
162+
unanchoredResolved++;
163+
log.info("Unanchored issue {} resolved by AI (new issue {}, file={})",
164+
prevIssue.getId(), matchedNew.getId(), filePath);
165+
} else if (prevIssue.isResolved()) {
166+
// Previous was resolved (user dismissed) → carry forward
167+
matchedNew.setResolved(true);
168+
matchedNew.setResolvedDescription(prevIssue.getResolvedDescription());
169+
matchedNew.setResolvedByPr(prevIssue.getResolvedByPr());
170+
matchedNew.setResolvedCommitHash(prevIssue.getResolvedCommitHash());
171+
matchedNew.setResolvedAnalysisId(prevIssue.getResolvedAnalysisId());
172+
matchedNew.setResolvedAt(prevIssue.getResolvedAt());
173+
matchedNew.setResolvedBy(prevIssue.getResolvedBy());
174+
unanchoredResolved++;
175+
} else {
176+
// AI says still present — persists (but can be overridden by
177+
// dedicated AI reconciliation if caller implements it)
178+
unanchoredPersisting++;
179+
}
180+
issueRepository.save(matchedNew);
181+
matched++;
182+
} else {
183+
// AI did NOT re-report this issue → it's resolved
184+
unanchoredResolved++;
185+
resolved++;
186+
log.info("Unanchored issue {} resolved — AI omitted it in new review (file={})",
187+
prevIssue.getId(), filePath);
188+
}
189+
}
190+
191+
// Remove unanchored-matched new issues from the pool before IssueTracker
192+
// so they don't get double-counted or double-matched
193+
fileNewIssues = fileNewIssues.stream()
194+
.filter(ni -> !unanchoredMatchedNewIssues.contains(ni))
195+
.collect(Collectors.toList());
196+
}
197+
198+
// ── Run IssueTracker on anchored issues only ──
199+
if (anchoredPrevIssues.isEmpty() && fileNewIssues.isEmpty()) {
200+
continue;
201+
}
202+
if (anchoredPrevIssues.isEmpty()) {
203+
newOnly += fileNewIssues.size();
204+
continue;
205+
}
206+
if (fileNewIssues.isEmpty()) {
207+
resolved += anchoredPrevIssues.size();
208+
continue;
209+
}
210+
105211
// Wrap issues as Trackables
106212
// RAW = new issues (recomputed against new file content)
107213
List<TrackableIssue> rawTrackables = fileNewIssues.stream()
108214
.map(issue -> recomputeTrackable(issue, newFileContents))
109215
.collect(Collectors.toList());
110216

111217
// BASE = previous issues (with original detection-time hashes)
112-
List<TrackableIssue> baseTrackables = filePrevIssues.stream()
218+
List<TrackableIssue> baseTrackables = anchoredPrevIssues.stream()
113219
.map(TrackableIssue::fromOriginal)
114220
.collect(Collectors.toList());
115221

@@ -125,9 +231,12 @@ public TrackingSummary trackPrIteration(
125231
newIssue.setTrackedFromIssueId(prevIssue.getId());
126232
newIssue.setTrackingConfidence(pair.confidence().name());
127233

128-
// If the previous issue was resolved (e.g. user dismissed it), carry that
129-
// status forward so the same issue doesn't reappear as an annotation.
234+
// Check resolved status from BOTH directions:
235+
// 1. Previous issue was resolved (user dismissed, or previous reconciliation)
236+
// 2. New issue was marked resolved by the AI in Stage 1 review
237+
// (createIssueFromData sets isResolved=true when AI says isResolved: true)
130238
if (prevIssue.isResolved()) {
239+
// Carry forward previous resolution
131240
newIssue.setResolved(true);
132241
newIssue.setResolvedDescription(prevIssue.getResolvedDescription());
133242
newIssue.setResolvedByPr(prevIssue.getResolvedByPr());
@@ -137,6 +246,10 @@ public TrackingSummary trackPrIteration(
137246
newIssue.setResolvedBy(prevIssue.getResolvedBy());
138247
log.info("Carried forward resolved status from issue {} to new issue {} (confidence={})",
139248
prevIssue.getId(), newIssue.getId(), pair.confidence().name());
249+
} else if (newIssue.isResolved()) {
250+
// AI in Stage 1 marked this re-emitted issue as resolved — trust it
251+
log.info("AI marked matched issue as resolved: prev={} → new={} (confidence={})",
252+
prevIssue.getId(), newIssue.getId(), pair.confidence().name());
140253
}
141254

142255
issueRepository.save(newIssue);
@@ -150,11 +263,14 @@ public TrackingSummary trackPrIteration(
150263
resolved += tracking.getUnmatchedBases().size();
151264
}
152265

153-
log.info("PR tracking for analysis {}: {} matched, {} new, {} resolved (previous analysis={})",
154-
newAnalysis.getId(), matched, newOnly, resolved, previousAnalysis.getId());
266+
log.info("PR tracking for analysis {}: {} matched, {} new, {} resolved " +
267+
"(unanchored: {} resolved, {} persisting) (previous analysis={})",
268+
newAnalysis.getId(), matched, newOnly, resolved,
269+
unanchoredResolved, unanchoredPersisting, previousAnalysis.getId());
155270

156271
return new TrackingSummary(matched, resolved, newOnly,
157-
prevIssues.stream().filter(CodeAnalysisIssue::isResolved).count());
272+
prevIssues.stream().filter(CodeAnalysisIssue::isResolved).count(),
273+
unanchoredResolved, unanchoredPersisting);
158274
}
159275

160276
// ── Trackable adapter ────────────────────────────────────────────────
@@ -217,6 +333,18 @@ private TrackableIssue recomputeTrackable(CodeAnalysisIssue issue, Map<String, S
217333
);
218334
}
219335

336+
/**
337+
* An issue is "unanchored" when it has no meaningful code location —
338+
* line is absent or 1, no line hash, and no code snippet.
339+
* These cannot be reliably tracked by content hashing (IssueTracker Pass 3/4
340+
* would match them forever on fingerprint+line alone).
341+
*/
342+
private boolean isUnanchored(CodeAnalysisIssue issue) {
343+
return (issue.getLineNumber() == null || issue.getLineNumber() <= 1)
344+
&& issue.getLineHash() == null
345+
&& (issue.getCodeSnippet() == null || issue.getCodeSnippet().isBlank());
346+
}
347+
220348
private Map<String, List<CodeAnalysisIssue>> groupByFile(List<CodeAnalysisIssue> issues) {
221349
return issues.stream()
222350
.filter(i -> i.getFilePath() != null)
@@ -232,8 +360,15 @@ public record TrackingSummary(
232360
int matchedCount,
233361
int resolvedCount,
234362
int newIssueCount,
235-
long previouslyResolvedCount
363+
long previouslyResolvedCount,
364+
int unanchoredResolvedCount,
365+
int unanchoredPersistingCount
236366
) {
367+
/** Convenience constructor for first iteration (no tracking). */
368+
public TrackingSummary(int matchedCount, int resolvedCount, int newIssueCount, long previouslyResolvedCount) {
369+
this(matchedCount, resolvedCount, newIssueCount, previouslyResolvedCount, 0, 0);
370+
}
371+
237372
public boolean isFirstIteration() {
238373
return matchedCount == 0 && resolvedCount == 0 && previouslyResolvedCount == 0;
239374
}

java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/service/PrFileEnrichmentServiceTest.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -87,31 +87,38 @@ class DisabledAndEmptyTests {
8787
@Nested
8888
@DisplayName("enrichPrFiles() - file filtering")
8989
class FilterTests {
90-
@Test void filtersUnsupportedExtensions() {
90+
@Test void filtersBinaryExtensions() {
9191
PrEnrichmentDataDto result = service.enrichPrFiles(vcsClient, "ws", "repo", "main",
92-
List.of("README.md", "image.png", "data.json", "config.yml"));
92+
List.of("image.png", "photo.jpg", "archive.zip", "lib.dll"));
9393
assertThat(result.stats().filesSkipped()).isEqualTo(4);
94-
assertThat(result.stats().skipReasons()).containsKey("unsupported_extension");
94+
assertThat(result.stats().skipReasons()).containsKey("binary_or_non_text");
9595
}
9696

97-
@Test void keepsSupportedExtensions() throws Exception {
98-
List<String> files = List.of("Main.java", "app.py", "index.ts", "style.css");
97+
@Test void keepsAllTextFiles() throws Exception {
98+
List<String> files = List.of("Main.java", "app.py", "index.ts", "README.md",
99+
"deploy.sh", "notes.txt", "config.yml", "data.json");
99100
Map<String, String> contents = Map.of(
100101
"Main.java", "public class Main {}",
101102
"app.py", "print('hello')",
102-
"index.ts", "export default {}"
103+
"index.ts", "export default {}",
104+
"README.md", "# README",
105+
"deploy.sh", "#!/bin/bash",
106+
"notes.txt", "some notes",
107+
"config.yml", "key: value",
108+
"data.json", "{}"
103109
);
104110
when(vcsClient.getFileContents(eq("ws"), eq("repo"), anyList(), eq("main"), anyInt()))
105111
.thenReturn(contents);
106112

107113
mockWebServer.enqueue(new MockResponse()
108114
.setResponseCode(200)
109115
.addHeader("Content-Type", "application/json")
110-
.setBody("{\"results\":[], \"total_files\":3, \"successful\":3, \"failed\":0}"));
116+
.setBody("{\"results\":[], \"total_files\":8, \"successful\":8, \"failed\":0}"));
111117

112118
PrEnrichmentDataDto result = service.enrichPrFiles(vcsClient, "ws", "repo", "main", files);
113-
assertThat(result.stats().totalFilesRequested()).isEqualTo(4);
114-
assertThat(result.stats().skipReasons()).containsKey("unsupported_extension");
119+
assertThat(result.stats().totalFilesRequested()).isEqualTo(8);
120+
assertThat(result.stats().filesEnriched()).isEqualTo(8);
121+
assertThat(result.stats().skipReasons()).doesNotContainKey("binary_or_non_text");
115122
}
116123
}
117124

0 commit comments

Comments
 (0)