Merge pull request #52 from alpsla/fix/report-quality-4-bugs

alpsla · web-flow · commit d91b3fb34b4d · 2025-10-31T14:49:04.000-04:00
Fix/report quality 4 bugs
diff --git a/packages/agents/src/two-branch/agents/specialized-agents.ts b/packages/agents/src/two-branch/agents/specialized-agents.ts
@@ -410,6 +410,12 @@ export class SecurityAgent extends BaseSpecializedAgent {
 
 ⚠️ CRITICAL: Output ONLY the JSON response. NO thinking process, NO reasoning, NO "First, I...", NO "Let me...". Start DIRECTLY with JSON.
 
+SEVERITY CLASSIFICATION:
+🔴 CRITICAL: SQL injection, command injection, RCE, auth bypass, hardcoded credentials, data loss
+🟠 HIGH: Potential bugs (NPE, resource leaks), security weaknesses, crypto issues
+🟡 MEDIUM: Code smells, maintainability issues, moderate complexity
+🟢 LOW: Style/formatting/documentation (checkstyle=LOW 99.9% of cases)
+
 Output ONLY this JSON (nothing else):
 {
   "severity": "critical|high|medium|low",
@@ -519,6 +525,12 @@ export class PerformanceAgent extends BaseSpecializedAgent {
 
 ⚠️ CRITICAL: Output ONLY the JSON response. NO thinking process, NO reasoning, NO "First, I...", NO "Let me...". Start DIRECTLY with JSON.
 
+SEVERITY CLASSIFICATION:
+🔴 CRITICAL: System crashes, memory leaks causing outages, infinite loops
+🟠 HIGH: N² algorithms in hot paths, significant resource waste, scalability blockers
+🟡 MEDIUM: Suboptimal algorithms, minor inefficiencies
+🟢 LOW: Style/formatting/documentation (checkstyle=LOW 99.9% of cases)
+
 Output ONLY this JSON (nothing else):
 {
   "severity": "critical|high|medium|low",
@@ -557,6 +569,12 @@ export class ArchitectureAgent extends BaseSpecializedAgent {
 
 ⚠️ CRITICAL: Output ONLY the JSON response. NO thinking process, NO reasoning, NO "First, I...", NO "Let me...". Start DIRECTLY with JSON.
 
+SEVERITY CLASSIFICATION:
+🔴 CRITICAL: Circular dependencies breaking builds, major SOLID violations causing outages
+🟠 HIGH: God classes (1000+ lines), tight coupling blocking features
+🟡 MEDIUM: Minor design smells, moderate complexity
+🟢 LOW: Style/formatting/documentation (checkstyle=LOW 99.9% of cases)
+
 Output ONLY this JSON (nothing else):
 {
   "severity": "critical|high|medium|low",
@@ -623,6 +641,14 @@ export class CodeQualityAgent extends BaseSpecializedAgent {
 
 ⚠️ CRITICAL: Output ONLY the JSON response. NO thinking process, NO reasoning, NO "First, I...", NO "Let me...". Start DIRECTLY with JSON.
 
+SEVERITY CLASSIFICATION (CRITICAL for CodeQuality):
+🔴 CRITICAL: Never for code quality (reserved for security/crashes)
+🟠 HIGH: Logic bugs, potential NPE, incorrect exception handling
+🟡 MEDIUM: Complexity warnings, code duplication, refactoring candidates
+🟢 LOW: Style/formatting/documentation/naming (checkstyle/PMD naming rules = LOW)
+
+⚠️ CHECKSTYLE = LOW (99.9%): DesignForExtensionCheck, naming conventions, line length, imports, Javadoc
+
 Output ONLY this JSON (nothing else):
 {
   "severity": "critical|high|medium|low",
@@ -887,6 +913,12 @@ export class DependencyAgent extends BaseSpecializedAgent {
 
 ⚠️ CRITICAL: Output ONLY the JSON response. NO thinking process, NO reasoning, NO "First, I...", NO "Let me...". Start DIRECTLY with JSON.
 
+SEVERITY CLASSIFICATION:
+🔴 CRITICAL: Known CVE with exploit code, RCE vulnerabilities, authentication bypass
+🟠 HIGH: CVEs without public exploit, deprecated packages, security weaknesses
+🟡 MEDIUM: Outdated dependencies, minor vulnerabilities
+🟢 LOW: Style/formatting/documentation (checkstyle=LOW 99.9% of cases)
+
 Output ONLY this JSON (nothing else):
 {
   "severity": "critical|high|medium|low",
diff --git a/packages/agents/src/two-branch/analyzers/v9-grouped-report-formatter.ts b/packages/agents/src/two-branch/analyzers/v9-grouped-report-formatter.ts
@@ -26,7 +26,7 @@ import {
   cleanAIContent,
   getUserFriendlyTitle
 } from '../report/formatter-utils';
-import { getCuratedResourcesForRule, enrichIssuesWithAI, enrichIssuesWithSeverityClassification } from '../report/ai-enrichment';
+import { getCuratedResourcesForRule, enrichIssuesWithAI } from '../report/ai-enrichment';
 import {
   detectCategory,
   calculateRiskLevel,
@@ -379,21 +379,19 @@ export class V9GroupedReportFormatter {
     // Store repoPath for snippet extraction
     this.repoPath = metadata.repoPath || null;
 
-    // SESSION 13 FIX #2 (MANDATORY): AI-powered severity classification FIRST
-    // This re-classifies severity intelligently (e.g., Javadoc HIGH → LOW)
-    // Cost: ~150 tokens per group = ~$0.0001 per group = ~$0.002 per PR
-    // This is a CORE FEATURE - always enabled for consistent, high-quality results
-    // If AI fails, gracefully falls back to original severity (handled in catch blocks)
-    // SESSION 13 FIX #3 (CONFIG-BASED): Pass modelConfigResolver for config-based Qwen model
-    const severityClassifiedIssues = await enrichIssuesWithSeverityClassification(issues, groups, this.modelConfigResolver);
+    // OPTIMIZATION: Severity classification now integrated into specialized agents (saves ~150 tokens per group)
+    // Each agent classifies severity AS PART of generating fix suggestions (1 AI call instead of 2)
+    // Cost: ~600 tokens per group = ~$0.0003 per group = ~$0.009 per PR (was ~$0.011 before)
 
-    // SESSION 13 FIX #4 (BUG-87): Update group severities based on AI-classified issues
+    // BUG-76: AI-enrich issues (includes severity classification + fix generation in 1 call)
+    const enrichedIssues = await this.enrichIssuesWithAI(issues, groups);
+
+    // Update group severities based on AI-classified issues
     // After AI classification updates individual issue severities, we need to update
     // each group's severity to reflect the AI-classified issues (not original severities)
-    // Match issues to groups by rule + tool (not severity, since it changed)
     const updatedGroups = groups.map(group => {
-      // Find all issues in this group (match by rule + tool, not severity)
-      const groupIssues = severityClassifiedIssues.filter(issue =>
+      // Find all issues in this group (match by rule + tool, not severity, since it changed)
+      const groupIssues = enrichedIssues.filter(issue =>
         issue.rule === group.rule && issue.tool === group.tool
       );
 
@@ -418,10 +416,10 @@ export class V9GroupedReportFormatter {
       };
     });
 
-    // SESSION 13 FIX #5 (BUG-88): Recalculate blockingCount after AI severity classification
+    // Recalculate blockingCount after AI severity classification
     // The original blockingCount was calculated before AI changed severities (high → low)
     // Now we need to count blocking issues using AI-classified severities
-    const updatedBlockingCount = severityClassifiedIssues.filter(i =>
+    const updatedBlockingCount = enrichedIssues.filter(i =>
       (i.category === 'NEW' || i.category === 'EXISTING_MODIFIED') &&
       (i.severity === 'critical' || i.severity === 'high')
     ).length;
@@ -432,10 +430,6 @@ export class V9GroupedReportFormatter {
     // Also update decision based on updated blocking count
     metadata.decision = updatedBlockingCount > 0 ? 'DECLINED' : 'APPROVED';
 
-    // BUG-76: AI-enrich issues BEFORE generating report sections
-    // This runs in parallel and adds fixSuggestion to each issue
-    const enrichedIssues = await this.enrichIssuesWithAI(severityClassifiedIssues, updatedGroups);
-
     console.log(`\n[DEBUG-PR#] ====== Before generateHeader ======`);
     console.log(`[DEBUG-PR#] Passing metadata.prNumber: ${metadata.prNumber}`);
     console.log(`[DEBUG-PR#] ====================================\n`);
diff --git a/packages/agents/src/two-branch/analyzers/v9-skill-score-manager.ts b/packages/agents/src/two-branch/analyzers/v9-skill-score-manager.ts
@@ -18,6 +18,7 @@ export interface SkillScoreData {
   repository: string;  // Will be stored as 'repo_name' in database
   prNumber: number;
   branch?: string;
+  commitHash?: string;  // BUG #4 FIX: Track commit to prevent duplicate trend entries
   overallScore: number;
   qualityScore?: number;
   categoryScores: {
@@ -84,20 +85,24 @@ export class SkillScoreManager {
   /**
    * Get score trend (last N scores)
    * Returns empty array if no history exists
+   *
+   * BUG #4 FIX: Filter duplicate commits to show only unique analysis runs
+   * Example: 60→30→30→30 becomes 60→30 (removes re-analysis of same commit)
    */
   async getScoreTrend(
     developerEmail: string,
     repository: string,
     limit = 5
   ): Promise<number[]> {
     try {
+      // Fetch more records than needed to account for potential duplicates
       const { data, error } = await this.supabase
         .from('skill_scores')
-        .select('overall_score')
+        .select('overall_score, commit_hash')
         .eq('developer_email', developerEmail)
         .eq('repo_name', repository)  // Fixed: use 'repo_name' column
         .order('analyzed_at', { ascending: true })
-        .limit(limit);
+        .limit(limit * 2);  // Fetch 2x to handle duplicates
 
       if (error) {
         console.warn('[SkillScoreManager] Error fetching trend:', error.message);
@@ -108,8 +113,21 @@ export class SkillScoreManager {
         return [];
       }
 
-      const trend = data.map(r => r.overall_score);
-      console.log(`[SkillScoreManager] Trend for ${developerEmail}: [${trend.join(', ')}]`);
+      // BUG #4 FIX: Remove duplicate commits, keep only latest analysis per commit
+      const seenCommits = new Set<string>();
+      const uniqueScores: number[] = [];
+
+      for (const record of data) {
+        const commitHash = record.commit_hash || `pr-${Math.random()}`; // Fallback for legacy data
+        if (!seenCommits.has(commitHash)) {
+          seenCommits.add(commitHash);
+          uniqueScores.push(record.overall_score);
+          if (uniqueScores.length >= limit) break;
+        }
+      }
+
+      const trend = uniqueScores.slice(0, limit);
+      console.log(`[SkillScoreManager] Trend for ${developerEmail}: [${trend.join(', ')}] (${data.length - trend.length} duplicates filtered)`);
       return trend;
     } catch (error) {
       console.error('[SkillScoreManager] Unexpected error fetching trend:', error);
@@ -131,6 +149,7 @@ export class SkillScoreManager {
         repo_name: scoreData.repository,  // Fixed: use 'repo_name' column
         pr_number: scoreData.prNumber,
         branch: scoreData.branch,
+        commit_hash: scoreData.commitHash,  // BUG #4 FIX: Store commit hash to prevent duplicate trends
         overall_score: scoreData.overallScore,
         quality_score: scoreData.qualityScore,
         security_score: scoreData.categoryScores.security,
diff --git a/packages/agents/src/two-branch/report/ai-enrichment.ts b/packages/agents/src/two-branch/report/ai-enrichment.ts
@@ -4,22 +4,15 @@
  * Handles AI-powered issue enrichment with fix suggestions AND severity classification.
  * Extracted from v9-grouped-report-formatter.ts for better modularity.
  *
- * Strategy: 1 AI call per group (cost-optimized)
- * Cost: ~600 tokens per group = $0.0003 per group
- *
- * SESSION 13 FIX #2 (PROPER): Integrated AI Severity Classifier
- * - Severity classification happens PER GROUP (not per issue)
- * - Uses cheap models for classification (~150 tokens per group)
- * - Total cost: ~29 groups × 150 tokens = ~4,350 tokens = ~$0.002
+ * OPTIMIZATION: Severity classification integrated into specialized agents
+ * - Each agent classifies severity AS PART of generating fix suggestions
+ * - 1 AI call per group (was 2 before: classify + enrich)
+ * - Cost: ~600 tokens per group = $0.0003 per group = ~$0.009 per PR
+ * - Savings: ~150 tokens per group (was ~$0.011, now ~$0.009)
  */
 
 import { EnrichedIssue } from './types';
 import { IssueGroup } from '../utils/issue-grouping';
-import {
-  classifyIssueSeverity,
-  type Severity,
-  type SeverityClassificationInput
-} from '../services/ai-severity-classifier';
 
 /**
  * Get curated educational resources for specific rules
@@ -54,108 +47,6 @@ export function getCuratedResourcesForRule(ruleId: string): Array<{ title: strin
   return map[normalized] || [];
 }
 
-/**
- * SESSION 13 FIX #2 (PROPER): AI-powered severity classification
- *
- * Re-classifies issue severity intelligently using AI, per group.
- * This replaces the hardcoded severity mapping approach.
- *
- * Strategy:
- * - Classify ONE representative issue per group
- * - Apply the classified severity to ALL issues in that group
- * - Cost-optimized: ~150 tokens per group = ~$0.0001 per group
- *
- * @param issues - All issues to re-classify
- * @param groups - Issue groups for efficient processing
- * @param modelConfigResolver - Model configuration resolver (from Supabase)
- * @returns Issues with AI-classified severity
- */
-export async function enrichIssuesWithSeverityClassification(
-  issues: EnrichedIssue[],
-  groups: IssueGroup[],
-  modelConfigResolver: any | null
-): Promise<EnrichedIssue[]> {
-  // SESSION 13 FIX #2 (MANDATORY): AI severity classification is now always enabled
-  // This is a core feature that provides intelligent severity analysis
-  // If AI fails, we gracefully fall back to original severity (handled in catch blocks)
-
-  console.log(`[AI Severity] Starting severity classification for ${groups.length} groups...`);
-  const startTime = Date.now();
-
-  try {
-    // Process groups in parallel (29 groups × ~150 tokens = ~4,350 tokens = ~$0.002)
-    const classificationPromises = groups.map(async (group) => {
-      const groupIssues = issues.filter(i =>
-        i.rule === group.rule && i.tool === group.tool && i.severity === group.severity
-      );
-
-      if (groupIssues.length === 0) return;
-
-      // Pick representative issue (first with code snippet)
-      const representative = groupIssues.find(i => i.snippet) || groupIssues[0];
-
-      // Save original severity for comparison
-      const originalSeverity = representative.severity as Severity;
-
-      try {
-        const classificationInput: SeverityClassificationInput = {
-          tool: representative.tool,
-          rule: representative.rule,
-          originalSeverity,
-          title: representative.message || representative.rule,
-          description: representative.message || '',
-          codeSnippet: representative.snippet
-        };
-
-        // Get model from config resolver (uses Qwen via OpenRouter)
-        // SESSION 13 FIX #3 (CONFIG-BASED): Use config resolver to get model configuration
-        // Severity classification doesn't need a specific role, use code_quality as default
-        let model: string | undefined;
-        if (modelConfigResolver) {
-          const modelConfig = await modelConfigResolver.getModelConfiguration(
-            'code_quality', // Severity classification uses code quality role
-            'java',        // Default to java (works for all languages)
-            'medium'       // Default to medium repo size
-          );
-          model = modelConfig.primary_model;
-        }
-
-        // Call AI Severity Classifier with config-based model
-        const classification = await classifyIssueSeverity(classificationInput, model);
-
-        // Apply classified severity to ALL issues in this group
-        for (const issue of groupIssues) {
-          issue.severity = classification.severity;
-          issue.severityReasoning = classification.reasoning;
-          issue.severityConfidence = classification.confidence;
-        }
-
-        // Log severity changes
-        if (classification.severity !== originalSeverity) {
-          console.log(`[AI Severity] ✅ ${group.rule}: ${originalSeverity} → ${classification.severity} (${classification.confidence} confidence)`);
-        }
-
-      } catch (error: any) {
-        console.warn(`[AI Severity] ⚠️  Failed for ${group.rule}:`, error.message);
-        // Keep original severity on error
-      }
-    });
-
-    await Promise.all(classificationPromises);
-
-    const duration = Date.now() - startTime;
-    const reclassifiedCount = issues.filter(i => i.severityReasoning).length;
-    console.log(`[AI Severity] Completed: ${reclassifiedCount}/${issues.length} issues re-classified in ${duration}ms`);
-
-    return issues;
-
-  } catch (error: any) {
-    console.error('[AI Severity] Fatal error:', error.message);
-    // Return issues with original severity
-    return issues;
-  }
-}
-
 /**
  * Enrich issues with AI-generated fix suggestions
  * 
diff --git a/packages/agents/src/two-branch/report/business-impact.ts b/packages/agents/src/two-branch/report/business-impact.ts
@@ -219,11 +219,12 @@ ${autoFixableBlockingCount} of ${blocking.length} blocking issues (${autoFixPerc
 
 | Metric | Value |
 |--------|-------|
-| **Manual Fix Cost** | **$${totalFixCost.toLocaleString()}** (${baseFixHours.toFixed(1)} hours - minimal, mostly for review/testing) |
+| **Auto-Fix Time** | **${Math.ceil(autoFixableBlockingCount / 100)} minutes** (run formatters + linters) |
+| **Review Time** | **${baseFixHours.toFixed(1)} hours** (${baseFixHours.toFixed(1)}h × $${developerRate}/h = $${totalFixCost.toLocaleString()}) |
 | **Auto-Fix Coverage** | **${autoFixPercentage.toFixed(0)}%** of blocking issues |
-| **Recommendation** | Run IDE auto-fix + code formatter, then review changes |
+| **Recommendation** | Run IDE auto-fix + code formatter, then code review changes |
 
-**Note:** Most issues are auto-fixable (LineLength, MissingJavadoc, Whitespace). The cost shown reflects review time, not manual coding.`
+**Note:** Auto-fix takes minutes to run. Review time ($${totalFixCost.toLocaleString()}) covers code review of auto-generated changes, NOT manual coding.`
     : `| Metric | Value |
 |--------|-------|
 | **Total Fix Cost** | **$${totalFixCost.toLocaleString()}** (${baseFixHours.toFixed(1)} hours, ~${fixDays} developer-days at $${developerRate}/hour) |
diff --git a/packages/agents/src/two-branch/report/metadata-footer.ts b/packages/agents/src/two-branch/report/metadata-footer.ts
@@ -81,14 +81,24 @@ export function generateAnalysisMetadata(
   // Add Agent Performance if available (optional)
   if (showAgentPerformance && metadata.agentPerformance && Array.isArray(metadata.agentPerformance) && metadata.agentPerformance.length > 0) {
     content += `\n### Agent Performance
-| Agent | Files Analyzed | Issues Found | Time | Cost |
-|-------|----------------|--------------|------|------|
+| Agent | Files Analyzed | Issues Found | Time | Cost | Model |
+|-------|----------------|--------------|------|------|-------|
 `;
     metadata.agentPerformance.forEach((agent: any) => {
       const issues = agent.issuesFound || agent.issues || 0;
       const time = agent.duration ? (agent.duration / 1000).toFixed(1) + 's' : 'N/A';
       const cost = agent.cost ? '$' + agent.cost.toFixed(4) : (issues === 0 ? 'N/A' : '$0.0000');
-      content += `| ${agent.name || agent.agent} | ${agent.filesAnalyzed || agent.files || 'N/A'} | ${issues} | ${time} | ${cost} |\n`;
+      // BUG #3 FIX: Extract model name from modelUsed object or fallback to direct properties
+      let model = 'N/A';
+      if (agent.modelUsed) {
+        // Model is in object format: { provider, model, temperature }
+        model = agent.modelUsed.model || agent.modelUsed.provider || 'N/A';
+      } else if (agent.model) {
+        model = agent.model;
+      } else if (agent.modelName) {
+        model = agent.modelName;
+      }
+      content += `| ${agent.name || agent.agent} | ${agent.filesAnalyzed || agent.files || 'N/A'} | ${issues} | ${time} | ${cost} | ${model} |\n`;
     });
   }
 
diff --git a/packages/agents/src/two-branch/services/ai-severity-classifier.ts b/packages/agents/src/two-branch/services/ai-severity-classifier.ts