Skip to content

Commit 9979904

Browse files
authored
Merge pull request #171 from rostilos/1.5.5-rc
feat: Enhance issue reconciliation by adding scope start and end line tracking in remap results
2 parents e8ddf5b + 7cb79a9 commit 9979904

6 files changed

Lines changed: 164 additions & 31 deletions

File tree

java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/service/IssueReconciliationEngine.java

Lines changed: 78 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,11 @@ public class IssueReconciliationEngine {
4848
public record LineRemapResult(
4949
ReconcilableIssue issue,
5050
int oldLine,
51-
int newLine
51+
int newLine,
52+
/** Remapped scope start line (null if original was null). */
53+
Integer newScopeStartLine,
54+
/** Remapped scope end line (null if original was null). */
55+
Integer newEndLineNumber
5256
) {
5357
public boolean changed() {
5458
return oldLine != newLine;
@@ -62,7 +66,11 @@ public record SnippetVerificationResult(
6266
ReconcilableIssue issue,
6367
int correctedLine,
6468
String correctedLineHash,
65-
String correctedContextHash
69+
String correctedContextHash,
70+
/** Scope start line resolved from AST at the corrected position (null if no AST). */
71+
Integer correctedScopeStartLine,
72+
/** Scope end line resolved from AST at the corrected position (null if no AST). */
73+
Integer correctedEndLineNumber
6674
) {}
6775

6876
/**
@@ -86,7 +94,11 @@ public record ContentClassification(
8694
/** Updated line number (set for CONFIRMED issues, null otherwise). */
8795
Integer updatedLine,
8896
/** Updated line hash (set for CONFIRMED issues, null otherwise). */
89-
String updatedLineHash
97+
String updatedLineHash,
98+
/** Updated scope start line resolved from AST (null if no AST or not CONFIRMED). */
99+
Integer updatedScopeStartLine,
100+
/** Updated scope end line resolved from AST (null if no AST or not CONFIRMED). */
101+
Integer updatedEndLineNumber
90102
) {}
91103

92104
/**
@@ -134,7 +146,19 @@ public List<LineRemapResult> remapLinesFromDiff(
134146

135147
int newLine = DiffParsingUtils.mapLineNumber(lineNum, hunks, fileDiff);
136148
if (newLine != lineNum) {
137-
results.add(new LineRemapResult(issue, lineNum, newLine));
149+
// Apply same diff remapping to scope boundaries
150+
Integer newScopeStart = null;
151+
Integer newEndLine = null;
152+
if (issue.getScopeStartLine() != null && issue.getScopeStartLine() > 0) {
153+
newScopeStart = DiffParsingUtils.mapLineNumber(
154+
issue.getScopeStartLine(), hunks, fileDiff);
155+
}
156+
if (issue.getEndLineNumber() != null && issue.getEndLineNumber() > 0) {
157+
newEndLine = DiffParsingUtils.mapLineNumber(
158+
issue.getEndLineNumber(), hunks, fileDiff);
159+
}
160+
results.add(new LineRemapResult(issue, lineNum, newLine,
161+
newScopeStart, newEndLine));
138162
}
139163
}
140164
return results;
@@ -214,7 +238,8 @@ public List<SnippetVerificationResult> verifySnippetAnchors(
214238
bestFoundLine,
215239
lineHashes.getHashForLine(bestFoundLine),
216240
computeContextHash(lineHashes, bestFoundLine,
217-
issue.getEndLineNumber(), issue.getCodeSnippet())
241+
issue.getEndLineNumber(), issue.getCodeSnippet()),
242+
null, null // No AST — scope boundaries not re-resolved
218243
));
219244
}
220245
}
@@ -252,7 +277,7 @@ public List<ContentClassification> classifyByContent(
252277
// Route ALL issues to AI instead of risking false RESOLVED classification.
253278
if (currentHashes.getLineCount() == 0) {
254279
for (ReconcilableIssue issue : issues) {
255-
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null));
280+
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null, null, null));
256281
}
257282
return results;
258283
}
@@ -263,15 +288,15 @@ public List<ContentClassification> classifyByContent(
263288

264289
// ── FILE-scope issues: always AI ──
265290
if (scope == IssueScope.FILE) {
266-
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null));
291+
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null, null, null));
267292
continue;
268293
}
269294

270295
// ── Unanchored issues: always AI ──
271296
boolean hasNoReliableAnchor = (currentLine == null || currentLine <= 1)
272297
&& (issue.getCodeSnippet() == null || issue.getCodeSnippet().isBlank());
273298
if (hasNoReliableAnchor) {
274-
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null));
299+
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null, null, null));
275300
continue;
276301
}
277302

@@ -312,7 +337,7 @@ public List<ContentClassification> classifyByContent(
312337
&& matchedSnippetLines < totalNonBlankSnippetLines) {
313338
// Some but not all snippet lines found — partial match
314339
// Treat as NEEDS_AI for compound issues
315-
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null));
340+
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null, null, null));
316341
continue;
317342
}
318343

@@ -353,13 +378,15 @@ public List<ContentClassification> classifyByContent(
353378
"anchor line intact but body modified, sending to AI.",
354379
scope, updatedLine);
355380
results.add(new ContentClassification(
356-
issue, Classification.NEEDS_AI, updatedLine, updatedLineHash));
381+
issue, Classification.NEEDS_AI, updatedLine, updatedLineHash, null, null));
357382
continue;
358383
}
359384
}
360385

386+
// Base method (no AST) — cannot re-resolve scope boundaries.
387+
// Scope start/end stay at their current values (caller preserves them).
361388
results.add(new ContentClassification(
362-
issue, Classification.CONFIRMED, updatedLine, updatedLineHash));
389+
issue, Classification.CONFIRMED, updatedLine, updatedLineHash, null, null));
363390
} else {
364391
// Content anchor not found — route to AI instead of auto-resolving.
365392
// The anchor (codeSnippet / lineHash) may be stale or imprecise:
@@ -373,7 +400,7 @@ public List<ContentClassification> classifyByContent(
373400
issue.getCodeSnippet() != null && !issue.getCodeSnippet().isBlank(),
374401
issue.getLineHash() != null);
375402
results.add(new ContentClassification(
376-
issue, Classification.NEEDS_AI, null, null));
403+
issue, Classification.NEEDS_AI, null, null, null, null));
377404
}
378405
}
379406

@@ -532,15 +559,15 @@ public List<ContentClassification> classifyByContent(
532559

533560
// ── FILE-scope issues: always AI ──
534561
if (scope == IssueScope.FILE) {
535-
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null));
562+
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null, null, null));
536563
continue;
537564
}
538565

539566
// ── Unanchored issues: always AI ──
540567
boolean hasNoReliableAnchor = (currentLine == null || currentLine <= 1)
541568
&& (issue.getCodeSnippet() == null || issue.getCodeSnippet().isBlank());
542569
if (hasNoReliableAnchor) {
543-
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null));
570+
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null, null, null));
544571
continue;
545572
}
546573

@@ -579,7 +606,7 @@ public List<ContentClassification> classifyByContent(
579606
if ((scope == IssueScope.BLOCK || scope == IssueScope.FUNCTION)
580607
&& totalNonBlankSnippetLines > 1
581608
&& matchedSnippetLines < totalNonBlankSnippetLines) {
582-
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null));
609+
results.add(new ContentClassification(issue, Classification.NEEDS_AI, null, null, null, null));
583610
continue;
584611
}
585612

@@ -616,13 +643,29 @@ public List<ContentClassification> classifyByContent(
616643
"anchor intact but scope body modified, sending to AI.",
617644
scope, updatedLine);
618645
results.add(new ContentClassification(
619-
issue, Classification.NEEDS_AI, updatedLine, updatedLineHash));
646+
issue, Classification.NEEDS_AI, updatedLine, updatedLineHash, null, null));
620647
continue;
621648
}
622649
}
623650

651+
// ── Resolve current scope boundaries from AST ──
652+
Integer updatedScopeStart = null;
653+
Integer updatedEndLine = null;
654+
try {
655+
Optional<ScopeInfo> scopeOpt = scopeResolver.innermostScopeAt(parsedTree, updatedLine);
656+
if (scopeOpt.isPresent()) {
657+
ScopeInfo si = scopeOpt.get();
658+
updatedScopeStart = si.startLine();
659+
updatedEndLine = si.endLine();
660+
}
661+
} catch (Exception e) {
662+
log.debug("AST scope resolution failed at line {} for CONFIRMED issue: {}",
663+
updatedLine, e.getMessage());
664+
}
665+
624666
results.add(new ContentClassification(
625-
issue, Classification.CONFIRMED, updatedLine, updatedLineHash));
667+
issue, Classification.CONFIRMED, updatedLine, updatedLineHash,
668+
updatedScopeStart, updatedEndLine));
626669
} else {
627670
// Content anchor not found — route to AI instead of auto-resolving.
628671
// Same reasoning as base classifyByContent: anchors can be stale/null.
@@ -632,7 +675,7 @@ public List<ContentClassification> classifyByContent(
632675
issue.getCodeSnippet() != null && !issue.getCodeSnippet().isBlank(),
633676
issue.getLineHash() != null);
634677
results.add(new ContentClassification(
635-
issue, Classification.NEEDS_AI, null, null));
678+
issue, Classification.NEEDS_AI, null, null, null, null));
636679
}
637680
}
638681

@@ -740,10 +783,26 @@ public List<SnippetVerificationResult> verifySnippetAnchors(
740783
String contextHash = computeContextHashFromAst(
741784
lineHashes, bestFoundLine, parsedTree, scopeResolver,
742785
issue.getEndLineNumber(), issue.getCodeSnippet());
786+
787+
// Resolve current scope boundaries from AST at the corrected line
788+
Integer correctedScopeStart = null;
789+
Integer correctedEndLine = null;
790+
try {
791+
Optional<ScopeInfo> scopeOpt = scopeResolver.innermostScopeAt(parsedTree, bestFoundLine);
792+
if (scopeOpt.isPresent()) {
793+
ScopeInfo si = scopeOpt.get();
794+
correctedScopeStart = si.startLine();
795+
correctedEndLine = si.endLine();
796+
}
797+
} catch (Exception e) {
798+
log.debug("AST scope resolution failed at corrected line {}: {}",
799+
bestFoundLine, e.getMessage());
800+
}
801+
743802
results.add(new SnippetVerificationResult(
744803
issue, bestFoundLine,
745804
lineHashes.getHashForLine(bestFoundLine),
746-
contextHash));
805+
contextHash, correctedScopeStart, correctedEndLine));
747806
}
748807
}
749808

java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/service/branch/BranchIssueReconciliationService.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ public void reconcileIssueLineNumbers(String rawDiff, Set<String> changedFiles,
104104
for (LineRemapResult remap : remaps) {
105105
BranchIssue bi = (BranchIssue) remap.issue();
106106
bi.setCurrentLineNumber(remap.newLine());
107+
if (remap.newScopeStartLine() != null) {
108+
bi.setCurrentScopeStartLine(remap.newScopeStartLine());
109+
}
110+
if (remap.newEndLineNumber() != null) {
111+
bi.setCurrentEndLineNumber(remap.newEndLineNumber());
112+
}
107113
branchIssueRepository.save(bi);
108114
}
109115

@@ -163,6 +169,12 @@ public void verifyIssueLineNumbersWithSnippets(Set<String> changedFiles,
163169
bi.setCurrentLineNumber(svr.correctedLine());
164170
bi.setCurrentLineHash(svr.correctedLineHash());
165171
bi.setLineHashContext(svr.correctedContextHash());
172+
if (svr.correctedScopeStartLine() != null) {
173+
bi.setCurrentScopeStartLine(svr.correctedScopeStartLine());
174+
}
175+
if (svr.correctedEndLineNumber() != null) {
176+
bi.setCurrentEndLineNumber(svr.correctedEndLineNumber());
177+
}
166178
branchIssueRepository.save(bi);
167179
}
168180

@@ -702,6 +714,12 @@ private void classifyIssuesByContent(List<BranchIssue> fileIssues,
702714
case CONFIRMED -> {
703715
bi.setCurrentLineNumber(cc.updatedLine());
704716
bi.setCurrentLineHash(cc.updatedLineHash());
717+
if (cc.updatedScopeStartLine() != null) {
718+
bi.setCurrentScopeStartLine(cc.updatedScopeStartLine());
719+
}
720+
if (cc.updatedEndLineNumber() != null) {
721+
bi.setCurrentEndLineNumber(cc.updatedEndLineNumber());
722+
}
705723
bi.setLastVerifiedCommit(request.getCommitHash());
706724
bi.setTrackingConfidence(TrackingConfidence.EXACT);
707725
branchIssueRepository.save(bi);

java-ecosystem/libs/analysis-engine/src/test/java/org/rostilos/codecrow/analysisengine/service/IssueReconciliationEngineTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -519,8 +519,8 @@ void verifySnippetAnchors_nullAst_shouldFallBackToBase() {
519519

520520
@Test
521521
void lineRemapResult_changed_shouldCompareCorrectly() {
522-
var changed = new IssueReconciliationEngine.LineRemapResult(issue(5, "x"), 5, 10);
523-
var unchanged = new IssueReconciliationEngine.LineRemapResult(issue(5, "x"), 5, 5);
522+
var changed = new IssueReconciliationEngine.LineRemapResult(issue(5, "x"), 5, 10, null, null);
523+
var unchanged = new IssueReconciliationEngine.LineRemapResult(issue(5, "x"), 5, 5, null, null);
524524
assertThat(changed.changed()).isTrue();
525525
assertThat(unchanged.changed()).isFalse();
526526
}

java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/service/CodeAnalysisService.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,17 @@ private CodeAnalysisIssue createIssueFromData(
515515
Map<String, String> fileContents
516516
) {
517517
try {
518+
// Sanitize all string values in the AI response map to strip null bytes
519+
// that PostgreSQL rejects ("invalid byte sequence for encoding UTF8: 0x00").
520+
for (Map.Entry<String, Object> entry : issueData.entrySet()) {
521+
if (entry.getValue() instanceof String s) {
522+
String sanitized = sanitizeForDb(s);
523+
if (sanitized != s) { // identity check — only replace if changed
524+
entry.setValue(sanitized);
525+
}
526+
}
527+
}
528+
518529
CodeAnalysisIssue issue = new CodeAnalysisIssue();
519530

520531
issue.setVcsAuthorId(vcsAuthorId);
@@ -800,6 +811,18 @@ private CodeAnalysisIssue createIssueFromData(Map<String, Object> issueData, Str
800811
return createIssueFromData(issueData, issueKey, vcsAuthorId, vcsAuthorUsername, null, null, null, Collections.emptyMap());
801812
}
802813

814+
/**
815+
* Strips null bytes (0x00) and other characters that PostgreSQL rejects in text columns.
816+
* LLM responses occasionally contain embedded null bytes which cause
817+
* "invalid byte sequence for encoding UTF8: 0x00" errors.
818+
*/
819+
private static String sanitizeForDb(String input) {
820+
if (input == null) return null;
821+
// PostgreSQL text columns reject 0x00; also strip other C0 control chars
822+
// except tab (0x09), newline (0x0A), and carriage return (0x0D).
823+
return input.replace("\u0000", "");
824+
}
825+
803826
public CodeAnalysis createAnalysis(Project project, AnalysisType analysisType) {
804827
CodeAnalysis analysis = new CodeAnalysis();
805828
analysis.setProject(project);

java-ecosystem/services/web-server/src/main/java/org/rostilos/codecrow/webserver/analysis/dto/response/FileViewResponse.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ public record InlineIssue(
3838
/** Tracking lineage: which previous issue this was tracked from (null for first iteration). */
3939
Long trackedFromIssueId,
4040
/** Tracking confidence: EXACT, SHIFTED, EDITED, WEAK, or null. */
41-
String trackingConfidence
41+
String trackingConfidence,
42+
/** Issue scope: LINE, BLOCK, FUNCTION, FILE, or null for legacy issues. */
43+
String issueScope,
44+
/** End line number for scope highlighting (null for LINE-scoped issues). */
45+
Integer endLineNumber,
46+
/** Scope start line for scope highlighting (null for LINE-scoped issues). */
47+
Integer scopeStartLine
4248
) {}
4349
}

0 commit comments

Comments
 (0)