Skip to content

Commit 54182c7

Browse files
alpslaclaude
andcommitted
SESSION 113: Add deduplication monitoring
- Track raw vs unique issue counts for PR and base branches - Log duplicates filtered during categorization - Display summary: "332 raw → 299 unique (33 duplicates removed)" - Helps verify deduplication is working correctly Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 56c1151 commit 54182c7

1 file changed

Lines changed: 33 additions & 7 deletions

File tree

packages/agents/src/two-branch/api/v9-analysis-service.ts

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,16 @@ export class V9AnalysisService {
644644
): EnrichedIssue[] {
645645
console.log(`📊 Step 3: Issue Categorization\n`);
646646

647+
// SESSION 113: Deduplication monitoring
648+
const dedupeStats = {
649+
prRawCount: prIssues.length,
650+
baseRawCount: baseIssues.length,
651+
prDuplicatesFiltered: 0,
652+
baseDuplicatesFiltered: 0,
653+
prUniqueCount: 0,
654+
baseUniqueCount: 0
655+
};
656+
647657
// SESSION 113 FIX: Use fuzzy matching with line-shift tolerance
648658
// When code is added/removed, line numbers shift but issues are still the same
649659
const LINE_SHIFT_THRESHOLD = 15; // Allow up to 15 lines of shift
@@ -715,7 +725,10 @@ export class V9AnalysisService {
715725
// Pass 1: Categorize PR issues
716726
prIssues.forEach(issue => {
717727
const exactSig = getExactSig(issue);
718-
if (processedPrSigs.has(exactSig)) return;
728+
if (processedPrSigs.has(exactSig)) {
729+
dedupeStats.prDuplicatesFiltered++;
730+
return;
731+
}
719732
processedPrSigs.add(exactSig);
720733

721734
const normalizedFile = normalizePath(issue.file);
@@ -738,11 +751,16 @@ export class V9AnalysisService {
738751
});
739752
});
740753

754+
dedupeStats.prUniqueCount = processedPrSigs.size;
755+
741756
// Pass 2: Find RESOLVED issues (in base but not in PR)
742757
const processedBaseSigs = new Set<string>();
743758
baseIssues.forEach(issue => {
744759
const exactSig = getExactSig(issue);
745-
if (processedBaseSigs.has(exactSig)) return;
760+
if (processedBaseSigs.has(exactSig)) {
761+
dedupeStats.baseDuplicatesFiltered++;
762+
return;
763+
}
746764
processedBaseSigs.add(exactSig);
747765

748766
const normalizedFile = normalizePath(issue.file);
@@ -760,18 +778,26 @@ export class V9AnalysisService {
760778
}
761779
});
762780

781+
dedupeStats.baseUniqueCount = processedBaseSigs.size;
782+
763783
const counts = {
764784
NEW: categorizedIssues.filter(i => i.category === 'NEW').length,
765785
EXISTING_MODIFIED: categorizedIssues.filter(i => i.category === 'EXISTING_MODIFIED').length,
766786
RESOLVED: categorizedIssues.filter(i => i.category === 'RESOLVED').length,
767787
EXISTING_REST: categorizedIssues.filter(i => i.category === 'EXISTING_REST').length
768788
};
769789

770-
console.log(` NEW: ${counts.NEW}`);
771-
console.log(` EXISTING_MODIFIED: ${counts.EXISTING_MODIFIED}`);
772-
console.log(` RESOLVED: ${counts.RESOLVED}`);
773-
console.log(` EXISTING_REST: ${counts.EXISTING_REST}`);
774-
console.log(` (Using line-shift tolerance of ${LINE_SHIFT_THRESHOLD} lines)\n`);
790+
// SESSION 113: Log deduplication stats
791+
console.log(` 📊 Deduplication Summary:`);
792+
console.log(` PR Branch: ${dedupeStats.prRawCount} raw → ${dedupeStats.prUniqueCount} unique (${dedupeStats.prDuplicatesFiltered} duplicates removed)`);
793+
console.log(` Base Branch: ${dedupeStats.baseRawCount} raw → ${dedupeStats.baseUniqueCount} unique (${dedupeStats.baseDuplicatesFiltered} duplicates removed)`);
794+
console.log(` 📋 Categorization:`);
795+
console.log(` NEW: ${counts.NEW}`);
796+
console.log(` EXISTING_MODIFIED: ${counts.EXISTING_MODIFIED}`);
797+
console.log(` RESOLVED: ${counts.RESOLVED}`);
798+
console.log(` EXISTING_REST: ${counts.EXISTING_REST}`);
799+
console.log(` TOTAL: ${categorizedIssues.length}`);
800+
console.log(` ⚙️ Line-shift tolerance: ±${LINE_SHIFT_THRESHOLD} lines\n`);
775801

776802
return categorizedIssues;
777803
}

0 commit comments

Comments
 (0)