Skip to content

Commit d91b3fb

Browse files
authored
Merge pull request #52 from alpsla/fix/report-quality-4-bugs
Fix/report quality 4 bugs
2 parents bfef26b + f28f0eb commit d91b3fb

7 files changed

Lines changed: 121 additions & 147 deletions

File tree

packages/agents/src/two-branch/agents/specialized-agents.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,12 @@ export class SecurityAgent extends BaseSpecializedAgent {
410410
411411
⚠️ CRITICAL: Output ONLY the JSON response. NO thinking process, NO reasoning, NO "First, I...", NO "Let me...". Start DIRECTLY with JSON.
412412
413+
SEVERITY CLASSIFICATION:
414+
🔴 CRITICAL: SQL injection, command injection, RCE, auth bypass, hardcoded credentials, data loss
415+
🟠 HIGH: Potential bugs (NPE, resource leaks), security weaknesses, crypto issues
416+
🟡 MEDIUM: Code smells, maintainability issues, moderate complexity
417+
🟢 LOW: Style/formatting/documentation (checkstyle=LOW 99.9% of cases)
418+
413419
Output ONLY this JSON (nothing else):
414420
{
415421
"severity": "critical|high|medium|low",
@@ -519,6 +525,12 @@ export class PerformanceAgent extends BaseSpecializedAgent {
519525
520526
⚠️ CRITICAL: Output ONLY the JSON response. NO thinking process, NO reasoning, NO "First, I...", NO "Let me...". Start DIRECTLY with JSON.
521527
528+
SEVERITY CLASSIFICATION:
529+
🔴 CRITICAL: System crashes, memory leaks causing outages, infinite loops
530+
🟠 HIGH: N² algorithms in hot paths, significant resource waste, scalability blockers
531+
🟡 MEDIUM: Suboptimal algorithms, minor inefficiencies
532+
🟢 LOW: Style/formatting/documentation (checkstyle=LOW 99.9% of cases)
533+
522534
Output ONLY this JSON (nothing else):
523535
{
524536
"severity": "critical|high|medium|low",
@@ -557,6 +569,12 @@ export class ArchitectureAgent extends BaseSpecializedAgent {
557569
558570
⚠️ CRITICAL: Output ONLY the JSON response. NO thinking process, NO reasoning, NO "First, I...", NO "Let me...". Start DIRECTLY with JSON.
559571
572+
SEVERITY CLASSIFICATION:
573+
🔴 CRITICAL: Circular dependencies breaking builds, major SOLID violations causing outages
574+
🟠 HIGH: God classes (1000+ lines), tight coupling blocking features
575+
🟡 MEDIUM: Minor design smells, moderate complexity
576+
🟢 LOW: Style/formatting/documentation (checkstyle=LOW 99.9% of cases)
577+
560578
Output ONLY this JSON (nothing else):
561579
{
562580
"severity": "critical|high|medium|low",
@@ -623,6 +641,14 @@ export class CodeQualityAgent extends BaseSpecializedAgent {
623641
624642
⚠️ CRITICAL: Output ONLY the JSON response. NO thinking process, NO reasoning, NO "First, I...", NO "Let me...". Start DIRECTLY with JSON.
625643
644+
SEVERITY CLASSIFICATION (CRITICAL for CodeQuality):
645+
🔴 CRITICAL: Never for code quality (reserved for security/crashes)
646+
🟠 HIGH: Logic bugs, potential NPE, incorrect exception handling
647+
🟡 MEDIUM: Complexity warnings, code duplication, refactoring candidates
648+
🟢 LOW: Style/formatting/documentation/naming (checkstyle/PMD naming rules = LOW)
649+
650+
⚠️ CHECKSTYLE = LOW (99.9%): DesignForExtensionCheck, naming conventions, line length, imports, Javadoc
651+
626652
Output ONLY this JSON (nothing else):
627653
{
628654
"severity": "critical|high|medium|low",
@@ -887,6 +913,12 @@ export class DependencyAgent extends BaseSpecializedAgent {
887913
888914
⚠️ CRITICAL: Output ONLY the JSON response. NO thinking process, NO reasoning, NO "First, I...", NO "Let me...". Start DIRECTLY with JSON.
889915
916+
SEVERITY CLASSIFICATION:
917+
🔴 CRITICAL: Known CVE with exploit code, RCE vulnerabilities, authentication bypass
918+
🟠 HIGH: CVEs without public exploit, deprecated packages, security weaknesses
919+
🟡 MEDIUM: Outdated dependencies, minor vulnerabilities
920+
🟢 LOW: Style/formatting/documentation (checkstyle=LOW 99.9% of cases)
921+
890922
Output ONLY this JSON (nothing else):
891923
{
892924
"severity": "critical|high|medium|low",

packages/agents/src/two-branch/analyzers/v9-grouped-report-formatter.ts

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import {
2626
cleanAIContent,
2727
getUserFriendlyTitle
2828
} from '../report/formatter-utils';
29-
import { getCuratedResourcesForRule, enrichIssuesWithAI, enrichIssuesWithSeverityClassification } from '../report/ai-enrichment';
29+
import { getCuratedResourcesForRule, enrichIssuesWithAI } from '../report/ai-enrichment';
3030
import {
3131
detectCategory,
3232
calculateRiskLevel,
@@ -379,21 +379,19 @@ export class V9GroupedReportFormatter {
379379
// Store repoPath for snippet extraction
380380
this.repoPath = metadata.repoPath || null;
381381

382-
// SESSION 13 FIX #2 (MANDATORY): AI-powered severity classification FIRST
383-
// This re-classifies severity intelligently (e.g., Javadoc HIGH → LOW)
384-
// Cost: ~150 tokens per group = ~$0.0001 per group = ~$0.002 per PR
385-
// This is a CORE FEATURE - always enabled for consistent, high-quality results
386-
// If AI fails, gracefully falls back to original severity (handled in catch blocks)
387-
// SESSION 13 FIX #3 (CONFIG-BASED): Pass modelConfigResolver for config-based Qwen model
388-
const severityClassifiedIssues = await enrichIssuesWithSeverityClassification(issues, groups, this.modelConfigResolver);
382+
// OPTIMIZATION: Severity classification now integrated into specialized agents (saves ~150 tokens per group)
383+
// Each agent classifies severity AS PART of generating fix suggestions (1 AI call instead of 2)
384+
// Cost: ~600 tokens per group = ~$0.0003 per group = ~$0.009 per PR (was ~$0.011 before)
389385

390-
// SESSION 13 FIX #4 (BUG-87): Update group severities based on AI-classified issues
386+
// BUG-76: AI-enrich issues (includes severity classification + fix generation in 1 call)
387+
const enrichedIssues = await this.enrichIssuesWithAI(issues, groups);
388+
389+
// Update group severities based on AI-classified issues
391390
// After AI classification updates individual issue severities, we need to update
392391
// each group's severity to reflect the AI-classified issues (not original severities)
393-
// Match issues to groups by rule + tool (not severity, since it changed)
394392
const updatedGroups = groups.map(group => {
395-
// Find all issues in this group (match by rule + tool, not severity)
396-
const groupIssues = severityClassifiedIssues.filter(issue =>
393+
// Find all issues in this group (match by rule + tool, not severity, since it changed)
394+
const groupIssues = enrichedIssues.filter(issue =>
397395
issue.rule === group.rule && issue.tool === group.tool
398396
);
399397

@@ -418,10 +416,10 @@ export class V9GroupedReportFormatter {
418416
};
419417
});
420418

421-
// SESSION 13 FIX #5 (BUG-88): Recalculate blockingCount after AI severity classification
419+
// Recalculate blockingCount after AI severity classification
422420
// The original blockingCount was calculated before AI changed severities (high → low)
423421
// Now we need to count blocking issues using AI-classified severities
424-
const updatedBlockingCount = severityClassifiedIssues.filter(i =>
422+
const updatedBlockingCount = enrichedIssues.filter(i =>
425423
(i.category === 'NEW' || i.category === 'EXISTING_MODIFIED') &&
426424
(i.severity === 'critical' || i.severity === 'high')
427425
).length;
@@ -432,10 +430,6 @@ export class V9GroupedReportFormatter {
432430
// Also update decision based on updated blocking count
433431
metadata.decision = updatedBlockingCount > 0 ? 'DECLINED' : 'APPROVED';
434432

435-
// BUG-76: AI-enrich issues BEFORE generating report sections
436-
// This runs in parallel and adds fixSuggestion to each issue
437-
const enrichedIssues = await this.enrichIssuesWithAI(severityClassifiedIssues, updatedGroups);
438-
439433
console.log(`\n[DEBUG-PR#] ====== Before generateHeader ======`);
440434
console.log(`[DEBUG-PR#] Passing metadata.prNumber: ${metadata.prNumber}`);
441435
console.log(`[DEBUG-PR#] ====================================\n`);

packages/agents/src/two-branch/analyzers/v9-skill-score-manager.ts

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export interface SkillScoreData {
1818
repository: string; // Will be stored as 'repo_name' in database
1919
prNumber: number;
2020
branch?: string;
21+
commitHash?: string; // BUG #4 FIX: Track commit to prevent duplicate trend entries
2122
overallScore: number;
2223
qualityScore?: number;
2324
categoryScores: {
@@ -84,20 +85,24 @@ export class SkillScoreManager {
8485
/**
8586
* Get score trend (last N scores)
8687
* Returns empty array if no history exists
88+
*
89+
* BUG #4 FIX: Filter duplicate commits to show only unique analysis runs
90+
* Example: 60→30→30→30 becomes 60→30 (removes re-analysis of same commit)
8791
*/
8892
async getScoreTrend(
8993
developerEmail: string,
9094
repository: string,
9195
limit = 5
9296
): Promise<number[]> {
9397
try {
98+
// Fetch more records than needed to account for potential duplicates
9499
const { data, error } = await this.supabase
95100
.from('skill_scores')
96-
.select('overall_score')
101+
.select('overall_score, commit_hash')
97102
.eq('developer_email', developerEmail)
98103
.eq('repo_name', repository) // Fixed: use 'repo_name' column
99104
.order('analyzed_at', { ascending: true })
100-
.limit(limit);
105+
.limit(limit * 2); // Fetch 2x to handle duplicates
101106

102107
if (error) {
103108
console.warn('[SkillScoreManager] Error fetching trend:', error.message);
@@ -108,8 +113,21 @@ export class SkillScoreManager {
108113
return [];
109114
}
110115

111-
const trend = data.map(r => r.overall_score);
112-
console.log(`[SkillScoreManager] Trend for ${developerEmail}: [${trend.join(', ')}]`);
116+
// BUG #4 FIX: Remove duplicate commits, keep only latest analysis per commit
117+
const seenCommits = new Set<string>();
118+
const uniqueScores: number[] = [];
119+
120+
for (const record of data) {
121+
const commitHash = record.commit_hash || `pr-${Math.random()}`; // Fallback for legacy data
122+
if (!seenCommits.has(commitHash)) {
123+
seenCommits.add(commitHash);
124+
uniqueScores.push(record.overall_score);
125+
if (uniqueScores.length >= limit) break;
126+
}
127+
}
128+
129+
const trend = uniqueScores.slice(0, limit);
130+
console.log(`[SkillScoreManager] Trend for ${developerEmail}: [${trend.join(', ')}] (${data.length - trend.length} duplicates filtered)`);
113131
return trend;
114132
} catch (error) {
115133
console.error('[SkillScoreManager] Unexpected error fetching trend:', error);
@@ -131,6 +149,7 @@ export class SkillScoreManager {
131149
repo_name: scoreData.repository, // Fixed: use 'repo_name' column
132150
pr_number: scoreData.prNumber,
133151
branch: scoreData.branch,
152+
commit_hash: scoreData.commitHash, // BUG #4 FIX: Store commit hash to prevent duplicate trends
134153
overall_score: scoreData.overallScore,
135154
quality_score: scoreData.qualityScore,
136155
security_score: scoreData.categoryScores.security,

packages/agents/src/two-branch/report/ai-enrichment.ts

Lines changed: 5 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,15 @@
44
* Handles AI-powered issue enrichment with fix suggestions AND severity classification.
55
* Extracted from v9-grouped-report-formatter.ts for better modularity.
66
*
7-
* Strategy: 1 AI call per group (cost-optimized)
8-
* Cost: ~600 tokens per group = $0.0003 per group
9-
*
10-
* SESSION 13 FIX #2 (PROPER): Integrated AI Severity Classifier
11-
* - Severity classification happens PER GROUP (not per issue)
12-
* - Uses cheap models for classification (~150 tokens per group)
13-
* - Total cost: ~29 groups × 150 tokens = ~4,350 tokens = ~$0.002
7+
* OPTIMIZATION: Severity classification integrated into specialized agents
8+
* - Each agent classifies severity AS PART of generating fix suggestions
9+
* - 1 AI call per group (was 2 before: classify + enrich)
10+
* - Cost: ~600 tokens per group = $0.0003 per group = ~$0.009 per PR
11+
* - Savings: ~150 tokens per group (was ~$0.011, now ~$0.009)
1412
*/
1513

1614
import { EnrichedIssue } from './types';
1715
import { IssueGroup } from '../utils/issue-grouping';
18-
import {
19-
classifyIssueSeverity,
20-
type Severity,
21-
type SeverityClassificationInput
22-
} from '../services/ai-severity-classifier';
2316

2417
/**
2518
* Get curated educational resources for specific rules
@@ -54,108 +47,6 @@ export function getCuratedResourcesForRule(ruleId: string): Array<{ title: strin
5447
return map[normalized] || [];
5548
}
5649

57-
/**
58-
* SESSION 13 FIX #2 (PROPER): AI-powered severity classification
59-
*
60-
* Re-classifies issue severity intelligently using AI, per group.
61-
* This replaces the hardcoded severity mapping approach.
62-
*
63-
* Strategy:
64-
* - Classify ONE representative issue per group
65-
* - Apply the classified severity to ALL issues in that group
66-
* - Cost-optimized: ~150 tokens per group = ~$0.0001 per group
67-
*
68-
* @param issues - All issues to re-classify
69-
* @param groups - Issue groups for efficient processing
70-
* @param modelConfigResolver - Model configuration resolver (from Supabase)
71-
* @returns Issues with AI-classified severity
72-
*/
73-
export async function enrichIssuesWithSeverityClassification(
74-
issues: EnrichedIssue[],
75-
groups: IssueGroup[],
76-
modelConfigResolver: any | null
77-
): Promise<EnrichedIssue[]> {
78-
// SESSION 13 FIX #2 (MANDATORY): AI severity classification is now always enabled
79-
// This is a core feature that provides intelligent severity analysis
80-
// If AI fails, we gracefully fall back to original severity (handled in catch blocks)
81-
82-
console.log(`[AI Severity] Starting severity classification for ${groups.length} groups...`);
83-
const startTime = Date.now();
84-
85-
try {
86-
// Process groups in parallel (29 groups × ~150 tokens = ~4,350 tokens = ~$0.002)
87-
const classificationPromises = groups.map(async (group) => {
88-
const groupIssues = issues.filter(i =>
89-
i.rule === group.rule && i.tool === group.tool && i.severity === group.severity
90-
);
91-
92-
if (groupIssues.length === 0) return;
93-
94-
// Pick representative issue (first with code snippet)
95-
const representative = groupIssues.find(i => i.snippet) || groupIssues[0];
96-
97-
// Save original severity for comparison
98-
const originalSeverity = representative.severity as Severity;
99-
100-
try {
101-
const classificationInput: SeverityClassificationInput = {
102-
tool: representative.tool,
103-
rule: representative.rule,
104-
originalSeverity,
105-
title: representative.message || representative.rule,
106-
description: representative.message || '',
107-
codeSnippet: representative.snippet
108-
};
109-
110-
// Get model from config resolver (uses Qwen via OpenRouter)
111-
// SESSION 13 FIX #3 (CONFIG-BASED): Use config resolver to get model configuration
112-
// Severity classification doesn't need a specific role, use code_quality as default
113-
let model: string | undefined;
114-
if (modelConfigResolver) {
115-
const modelConfig = await modelConfigResolver.getModelConfiguration(
116-
'code_quality', // Severity classification uses code quality role
117-
'java', // Default to java (works for all languages)
118-
'medium' // Default to medium repo size
119-
);
120-
model = modelConfig.primary_model;
121-
}
122-
123-
// Call AI Severity Classifier with config-based model
124-
const classification = await classifyIssueSeverity(classificationInput, model);
125-
126-
// Apply classified severity to ALL issues in this group
127-
for (const issue of groupIssues) {
128-
issue.severity = classification.severity;
129-
issue.severityReasoning = classification.reasoning;
130-
issue.severityConfidence = classification.confidence;
131-
}
132-
133-
// Log severity changes
134-
if (classification.severity !== originalSeverity) {
135-
console.log(`[AI Severity] ✅ ${group.rule}: ${originalSeverity}${classification.severity} (${classification.confidence} confidence)`);
136-
}
137-
138-
} catch (error: any) {
139-
console.warn(`[AI Severity] ⚠️ Failed for ${group.rule}:`, error.message);
140-
// Keep original severity on error
141-
}
142-
});
143-
144-
await Promise.all(classificationPromises);
145-
146-
const duration = Date.now() - startTime;
147-
const reclassifiedCount = issues.filter(i => i.severityReasoning).length;
148-
console.log(`[AI Severity] Completed: ${reclassifiedCount}/${issues.length} issues re-classified in ${duration}ms`);
149-
150-
return issues;
151-
152-
} catch (error: any) {
153-
console.error('[AI Severity] Fatal error:', error.message);
154-
// Return issues with original severity
155-
return issues;
156-
}
157-
}
158-
15950
/**
16051
* Enrich issues with AI-generated fix suggestions
16152
*

packages/agents/src/two-branch/report/business-impact.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,11 +219,12 @@ ${autoFixableBlockingCount} of ${blocking.length} blocking issues (${autoFixPerc
219219
220220
| Metric | Value |
221221
|--------|-------|
222-
| **Manual Fix Cost** | **$${totalFixCost.toLocaleString()}** (${baseFixHours.toFixed(1)} hours - minimal, mostly for review/testing) |
222+
| **Auto-Fix Time** | **${Math.ceil(autoFixableBlockingCount / 100)} minutes** (run formatters + linters) |
223+
| **Review Time** | **${baseFixHours.toFixed(1)} hours** (${baseFixHours.toFixed(1)}h × $${developerRate}/h = $${totalFixCost.toLocaleString()}) |
223224
| **Auto-Fix Coverage** | **${autoFixPercentage.toFixed(0)}%** of blocking issues |
224-
| **Recommendation** | Run IDE auto-fix + code formatter, then review changes |
225+
| **Recommendation** | Run IDE auto-fix + code formatter, then code review changes |
225226
226-
**Note:** Most issues are auto-fixable (LineLength, MissingJavadoc, Whitespace). The cost shown reflects review time, not manual coding.`
227+
**Note:** Auto-fix takes minutes to run. Review time ($${totalFixCost.toLocaleString()}) covers code review of auto-generated changes, NOT manual coding.`
227228
: `| Metric | Value |
228229
|--------|-------|
229230
| **Total Fix Cost** | **$${totalFixCost.toLocaleString()}** (${baseFixHours.toFixed(1)} hours, ~${fixDays} developer-days at $${developerRate}/hour) |

packages/agents/src/two-branch/report/metadata-footer.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,24 @@ export function generateAnalysisMetadata(
8181
// Add Agent Performance if available (optional)
8282
if (showAgentPerformance && metadata.agentPerformance && Array.isArray(metadata.agentPerformance) && metadata.agentPerformance.length > 0) {
8383
content += `\n### Agent Performance
84-
| Agent | Files Analyzed | Issues Found | Time | Cost |
85-
|-------|----------------|--------------|------|------|
84+
| Agent | Files Analyzed | Issues Found | Time | Cost | Model |
85+
|-------|----------------|--------------|------|------|-------|
8686
`;
8787
metadata.agentPerformance.forEach((agent: any) => {
8888
const issues = agent.issuesFound || agent.issues || 0;
8989
const time = agent.duration ? (agent.duration / 1000).toFixed(1) + 's' : 'N/A';
9090
const cost = agent.cost ? '$' + agent.cost.toFixed(4) : (issues === 0 ? 'N/A' : '$0.0000');
91-
content += `| ${agent.name || agent.agent} | ${agent.filesAnalyzed || agent.files || 'N/A'} | ${issues} | ${time} | ${cost} |\n`;
91+
// BUG #3 FIX: Extract model name from modelUsed object or fallback to direct properties
92+
let model = 'N/A';
93+
if (agent.modelUsed) {
94+
// Model is in object format: { provider, model, temperature }
95+
model = agent.modelUsed.model || agent.modelUsed.provider || 'N/A';
96+
} else if (agent.model) {
97+
model = agent.model;
98+
} else if (agent.modelName) {
99+
model = agent.modelName;
100+
}
101+
content += `| ${agent.name || agent.agent} | ${agent.filesAnalyzed || agent.files || 'N/A'} | ${issues} | ${time} | ${cost} | ${model} |\n`;
92102
});
93103
}
94104

0 commit comments

Comments
 (0)