MarkusNeusinger
diff --git a/‎.github/workflows/impl-generate.yml‎
Lines changed: 14 additions & 0 deletions b/‎.github/workflows/impl-generate.yml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎.github/workflows/impl-repair.yml‎
Lines changed: 14 additions & 0 deletions b/‎.github/workflows/impl-repair.yml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎.github/workflows/impl-review.yml‎
Lines changed: 67 additions & 15 deletions b/‎.github/workflows/impl-review.yml‎
Lines changed: 67 additions & 15 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 29 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎alembic/versions/6345896e2e90_add_extended_review_fields.py‎
Lines changed: 45 additions & 0 deletions b/‎alembic/versions/6345896e2e90_add_extended_review_fields.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎app/src/components/FilterBar.tsx‎
Lines changed: 27 additions & 15 deletions b/‎app/src/components/FilterBar.tsx‎
Lines changed: 27 additions & 15 deletions
@@ -271,6 +271,13 @@ jobs:
             1. Read `plots/${{ steps.inputs.outputs.specification_id }}/metadata/${{ steps.inputs.outputs.library }}.yaml`
                - Look at `review.strengths` (keep these aspects!)
                - Look at `review.weaknesses` (fix these problems - decide HOW yourself)
+               - Look at `review.image_description` (understand what was generated visually)
+               - Look at `review.criteria_checklist` (see exactly which criteria failed)
+                 - Focus on categories with low scores (e.g., visual_quality.score < visual_quality.max)
+                 - Check items with `passed: false` - these need fixing
+                 - VQ-XX items for visual issues
+                 - SC-XX items for spec compliance
+                 - CQ-XX items for code quality
             2. Read `plots/${{ steps.inputs.outputs.specification_id }}/implementations/${{ steps.inputs.outputs.library }}.py`
                - Understand what was done before
                - Keep what worked, fix what didn't
@@ -346,6 +353,13 @@ jobs:
             1. Read `plots/${{ steps.inputs.outputs.specification_id }}/metadata/${{ steps.inputs.outputs.library }}.yaml`
                - Look at `review.strengths` (keep these aspects!)
                - Look at `review.weaknesses` (fix these problems - decide HOW yourself)
+               - Look at `review.image_description` (understand what was generated visually)
+               - Look at `review.criteria_checklist` (see exactly which criteria failed)
+                 - Focus on categories with low scores (e.g., visual_quality.score < visual_quality.max)
+                 - Check items with `passed: false` - these need fixing
+                 - VQ-XX items for visual issues
+                 - SC-XX items for spec compliance
+                 - CQ-XX items for code quality
             2. Read `plots/${{ steps.inputs.outputs.specification_id }}/implementations/${{ steps.inputs.outputs.library }}.py`
                - Understand what was done before
                - Keep what worked, fix what didn't
 
@@ -127,6 +127,13 @@ jobs:
             2. `plots/${{ inputs.specification_id }}/metadata/${{ inputs.library }}.yaml` - Look at:
                - `review.strengths` (keep these aspects!)
                - `review.weaknesses` (fix these problems - decide HOW yourself)
+               - `review.image_description` (understand what was generated visually)
+               - `review.criteria_checklist` (see exactly which criteria failed)
+                 - Look for items with `passed: false` - these need fixing
+                 - Focus on categories with low scores (e.g., visual_quality.score < visual_quality.max)
+                 - VQ-XX items for visual issues
+                 - SC-XX items for spec compliance
+                 - CQ-XX items for code quality
 
             ### Step 2: Read reference files
             1. `prompts/library/${{ inputs.library }}.md` - Library-specific rules
@@ -192,6 +199,13 @@ jobs:
             2. `plots/${{ inputs.specification_id }}/metadata/${{ inputs.library }}.yaml` - Look at:
                - `review.strengths` (keep these aspects!)
                - `review.weaknesses` (fix these problems - decide HOW yourself)
+               - `review.image_description` (understand what was generated visually)
+               - `review.criteria_checklist` (see exactly which criteria failed)
+                 - Look for items with `passed: false` - these need fixing
+                 - Focus on categories with low scores (e.g., visual_quality.score < visual_quality.max)
+                 - VQ-XX items for visual issues
+                 - SC-XX items for spec compliance
+                 - CQ-XX items for code quality
 
             ### Step 2: Read reference files
             1. `prompts/library/${{ inputs.library }}.md` - Library-specific rules
 
@@ -206,12 +206,39 @@ jobs:
                # Save structured feedback as JSON (one array per file)
                echo '["Strength 1", "Strength 2"]' > review_strengths.json
                echo '["Weakness 1"]' > review_weaknesses.json
+
+               # Save verdict
+               echo "APPROVED" > review_verdict.txt  # or "REJECTED"
+
+               # Save image description (multi-line text)
+               cat > review_image_description.txt << 'EOF'
+               The plot shows a scatter plot with blue markers...
+               [Your full image description here]
+               EOF
+
+               # Save criteria checklist as structured JSON
+               cat > review_checklist.json << 'EOF'
+               {
+                 "visual_quality": {
+                   "score": 36,
+                   "max": 40,
+                   "items": [
+                     {"id": "VQ-01", "name": "Text Legibility", "score": 10, "max": 10, "passed": true, "comment": "All text readable"},
+                     {"id": "VQ-02", "name": "No Overlap", "score": 8, "max": 8, "passed": true, "comment": "No overlapping elements"}
+                   ]
+                 },
+                 "spec_compliance": {"score": 23, "max": 25, "items": [...]},
+                 "data_quality": {"score": 18, "max": 20, "items": [...]},
+                 "code_quality": {"score": 10, "max": 10, "items": [...]},
+                 "library_features": {"score": 5, "max": 5, "items": [...]}
+               }
+               EOF
                ```
 
             8. **DO NOT add ai-approved or ai-rejected labels** - the workflow will add them after updating metadata.
 
             **IMPORTANT**: Your review MUST include the "Image Description" section. A review without an image description will be considered invalid.
-            **IMPORTANT**: The Strengths/Weaknesses sections are saved to the metadata for future regeneration. Be specific!
+            **IMPORTANT**: All review data (strengths, weaknesses, image_description, criteria_checklist) is saved to metadata for future regeneration. Be specific!
 
       - name: Extract quality score
         id: score
@@ -266,21 +293,8 @@ jobs:
           git fetch origin "$BRANCH"
           git checkout -B "$BRANCH" "origin/$BRANCH"
 
-          # Read review feedback from JSON files (created by Claude)
-          STRENGTHS="[]"
-          WEAKNESSES="[]"
-
-          if [ -f "review_strengths.json" ]; then
-            STRENGTHS=$(cat review_strengths.json)
-          fi
-          if [ -f "review_weaknesses.json" ]; then
-            WEAKNESSES=$(cat review_weaknesses.json)
-          fi
-
           # Update metadata file with quality score, timestamp, and review feedback
           if [ -f "$METADATA_FILE" ]; then
-            # Update all metadata using Python for proper YAML handling
-            # Pass JSON via files to avoid shell escaping issues with quotes
             TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
 
             # Write Python script to temp file to avoid YAML/shell escaping issues
@@ -294,8 +308,12 @@ jobs:
           score = int(sys.argv[2])
           timestamp = sys.argv[3]
 
+          # Read existing review data files
           strengths = []
           weaknesses = []
+          image_description = None
+          criteria_checklist = None
+          verdict = None
 
           if Path('review_strengths.json').exists():
               try:
@@ -311,6 +329,28 @@ jobs:
               except:
                   pass
 
+          if Path('review_image_description.txt').exists():
+              try:
+                  with open('review_image_description.txt') as f:
+                      image_description = f.read().strip()
+              except:
+                  pass
+
+          if Path('review_checklist.json').exists():
+              try:
+                  with open('review_checklist.json') as f:
+                      criteria_checklist = json.load(f)
+              except:
+                  pass
+
+          if Path('review_verdict.txt').exists():
+              try:
+                  with open('review_verdict.txt') as f:
+                      verdict = f.read().strip()
+              except:
+                  pass
+
+          # Load existing metadata
           with open(metadata_file, 'r') as f:
               data = yaml.safe_load(f)
 
@@ -320,12 +360,24 @@ jobs:
           if 'review' not in data:
               data['review'] = {}
 
+          # Update review section with all fields
           data['review']['strengths'] = strengths
           data['review']['weaknesses'] = weaknesses
 
+          # Add extended review data (issue #2845)
+          if image_description:
+              data['review']['image_description'] = image_description
+          if criteria_checklist:
+              data['review']['criteria_checklist'] = criteria_checklist
+          if verdict:
+              data['review']['verdict'] = verdict
+
           def str_representer(dumper, data):
               if isinstance(data, str) and data.endswith('Z') and 'T' in data:
                   return dumper.represent_scalar('tag:yaml.org,2002:str', data, style="'")
+              # Use literal block style for multi-line strings (image_description)
+              if isinstance(data, str) and '\n' in data:
+                  return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
               return dumper.represent_scalar('tag:yaml.org,2002:str', data)
 
           yaml.add_representer(str, str_representer)
@@ -335,7 +387,7 @@ jobs:
           EOF
 
             python3 /tmp/update_metadata.py "$METADATA_FILE" "$SCORE" "$TIMESTAMP"
-            echo "::notice::Updated metadata with quality score ${SCORE} and review feedback"
+            echo "::notice::Updated metadata with quality score ${SCORE} and extended review data"
           fi
 
           # Update implementation header with quality score
 
@@ -308,6 +308,34 @@ quality_score: 92
 
 # Review feedback (used for regeneration)
 review:
+  # AI's visual description of the generated plot
+  image_description: |
+    The plot shows a scatter plot with 100 data points displaying
+    a positive correlation. Points are rendered in blue with 70%
+    opacity. Axes are clearly labeled and a subtle grid is visible.
+
+  # Detailed scoring breakdown by category
+  criteria_checklist:
+    visual_quality:
+      score: 36
+      max: 40
+      items:
+        - id: VQ-01
+          name: Text Legibility
+          score: 10
+          max: 10
+          passed: true
+          comment: "All text readable at full size"
+    spec_compliance:
+      score: 23
+      max: 25
+      items: [...]
+    # ... data_quality, code_quality, library_features
+
+  # Final verdict
+  verdict: APPROVED
+
+  # Summary feedback
   strengths:
     - "Clean code structure"
     - "Good use of alpha for overlapping points"
@@ -329,6 +357,7 @@ Quality: 92/100 | Created: 2025-01-10
 - Spec-level tracking in `specification.yaml`: `created`, `updated`, `issue`, `suggested`, `tags`
 - Per-library metadata in separate files (no merge conflicts!)
 - **Review feedback** stored in metadata for regeneration (AI reads previous feedback to improve)
+- **Extended review data**: `image_description`, `criteria_checklist`, and `verdict` for targeted fixes
 - Contributors credited via `suggested` field
 - Tags are at spec level (same for all libraries)
 - Per-library metadata updated automatically by `impl-review.yml` (quality score, review feedback)
 
@@ -0,0 +1,45 @@
+"""add_extended_review_fields
+
+Add extended review data fields to impls table for issue #2845:
+- review_image_description: AI's visual description of the plot
+- review_criteria_checklist: Detailed per-criterion scoring breakdown
+- review_verdict: "APPROVED" or "REJECTED"
+
+Revision ID: 6345896e2e90
+Revises: d0c76553a5cc
+Create Date: 2026-01-01
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision: str = "6345896e2e90"
+down_revision: Union[str, None] = "d0c76553a5cc"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Add extended review data columns to impls table."""
+    # Add review_image_description (text field for AI's visual description)
+    op.add_column("impls", sa.Column("review_image_description", sa.Text(), nullable=True))
+
+    # Add review_criteria_checklist (JSONB for detailed scoring breakdown)
+    op.add_column("impls", sa.Column("review_criteria_checklist", postgresql.JSONB(), nullable=True))
+
+    # Add review_verdict (short string: "APPROVED" or "REJECTED")
+    op.add_column("impls", sa.Column("review_verdict", sa.String(20), nullable=True))
+
+
+def downgrade() -> None:
+    """Remove extended review data columns from impls table."""
+    op.drop_column("impls", "review_verdict")
+    op.drop_column("impls", "review_criteria_checklist")
+    op.drop_column("impls", "review_image_description")
@@ -139,8 +139,7 @@ export function FilterBar({
   const handleValueSelect = useCallback(
     (category: FilterCategory, value: string) => {
       onAddFilter(category, value);
-      onTrackEvent('filter_add', { category, value });
-      // Track search if query was used
+      // Track search if query was used (filter changes tracked via pageview)
       if (searchQuery.trim()) {
         onTrackEvent('search', { query: searchQuery.trim(), category });
       }
@@ -169,46 +168,62 @@ export function FilterBar({
   const handleRemoveValue = useCallback(
     (value: string) => {
       if (activeGroupIndex !== null) {
-        const group = activeFilters[activeGroupIndex];
         onRemoveFilter(activeGroupIndex, value);
-        onTrackEvent('filter_remove', { category: group?.category || '', value });
       }
       setChipMenuAnchor(null);
       setActiveGroupIndex(null);
     },
-    [activeGroupIndex, activeFilters, onRemoveFilter, onTrackEvent]
+    [activeGroupIndex, onRemoveFilter]
   );
 
   // Remove entire group
   const handleRemoveGroup = useCallback(() => {
     if (activeGroupIndex !== null) {
-      const group = activeFilters[activeGroupIndex];
       onRemoveGroup(activeGroupIndex);
-      onTrackEvent('filter_remove_group', { category: group?.category || '' });
     }
     setChipMenuAnchor(null);
     setActiveGroupIndex(null);
-  }, [activeGroupIndex, activeFilters, onRemoveGroup, onTrackEvent]);
+  }, [activeGroupIndex, onRemoveGroup]);
 
   // Add value to existing group (OR)
   const handleAddValueToExistingGroup = useCallback(
     (value: string) => {
       if (activeGroupIndex !== null) {
-        const group = activeFilters[activeGroupIndex];
         onAddValueToGroup(activeGroupIndex, value);
-        onTrackEvent('filter_add_or', { category: group?.category || '', value });
       }
       setChipMenuAnchor(null);
       setActiveGroupIndex(null);
     },
-    [activeGroupIndex, activeFilters, onAddValueToGroup, onTrackEvent]
+    [activeGroupIndex, onAddValueToGroup]
   );
 
   // Memoize search results to avoid recalculating on every render
   const searchResults = useMemo(
     () => getSearchResults(filterCounts, activeFilters, searchQuery, selectedCategory),
     [filterCounts, activeFilters, searchQuery, selectedCategory]
   );
+
+  // Track searches with no results (debounced, to discover missing specs)
+  const lastTrackedQueryRef = useRef<string>('');
+  useEffect(() => {
+    const query = searchQuery.trim();
+    // Only track if: query >= 2 chars, no results, not already tracked this query
+    if (query.length >= 2 && searchResults.length === 0 && query !== lastTrackedQueryRef.current) {
+      const timer = setTimeout(() => {
+        onTrackEvent('search_no_results', { query });
+        lastTrackedQueryRef.current = query;
+      }, 500);
+      return () => clearTimeout(timer);
+    }
+  }, [searchQuery, searchResults.length, onTrackEvent]);
+
+  // Reset tracked query when dropdown closes
+  useEffect(() => {
+    if (!dropdownAnchor) {
+      lastTrackedQueryRef.current = '';
+    }
+  }, [dropdownAnchor]);
+
   // Only open if anchor is valid and in document
   const isDropdownOpen = Boolean(dropdownAnchor) && document.body.contains(dropdownAnchor);
   const hasQuery = searchQuery.trim().length > 0;
@@ -349,10 +364,7 @@ export function FilterBar({
             key={`${group.category}-${index}`}
             label={displayLabel}
             onClick={(e) => handleChipClick(e, index)}
-            onDelete={() => {
-              onRemoveGroup(index);
-              onTrackEvent('filter_remove_group', { category: group.category });
-            }}
+            onDelete={() => onRemoveGroup(index)}
             deleteIcon={<CloseIcon sx={{ fontSize: '1rem !important' }} />}
             sx={{
               fontFamily: '"MonoLisa", "MonoLisa Fallback", monospace',